3 # Average bandwidth monitoring script. Run periodically via cron(8) to
4 # enforce a soft limit on daily bandwidth usage for each slice. If a
5 # slice is found to have exceeded its daily bandwidth usage when the
6 # script is run, its instantaneous rate will be capped at the desired
7 # average rate. Thus, in the worst case, a slice will only be able to
8 # send a little more than twice its average daily limit.
10 # Two separate limits are enforced, one for destinations exempt from
11 # the node bandwidth cap, and the other for all other destinations.
13 # Mark Huang <mlhuang@cs.princeton.edu>
14 # Andy Bavier <acb@cs.princeton.edu>
15 # Copyright (C) 2004-2006 The Trustees of Princeton University
17 # $Id: BandwidthMonitor.py,v 1.1 2006/04/25 14:40:28 mlhuang Exp $
37 seconds_per_day = 24 * 60 * 60
43 datafile = "/var/lib/misc/bwmon.dat"
46 default_maxrate = bwlimit.get_bwcap()
48 default_maxexemptrate = bwlimit.bwmax
50 # 500 Kbit or 5.4 GB per day
51 default_avgrate = 500000
53 # 1.5 Mbit or 16.4 GB per day
54 default_avgexemptrate = 1500000
57 period = 1 * seconds_per_day
62 The slice %(slice)s has transmitted more than %(bytes)s from
63 %(hostname)s to %(class)s destinations
66 Its maximum %(class)s burst rate will be capped at %(avgrate)s
69 Please reduce the average %(class)s transmission rate
70 of the slice to %(avgrate)s, or %(limit)s per %(period)s.
76 %(date)s %(hostname)s bwcap %(slice)s
81 Stores the last recorded bandwidth parameters of a slice.
83 xid - slice context/VServer ID
85 time - beginning of recording period in UNIX seconds
86 bytes - low bandwidth bytes transmitted at the beginning of the recording period
87 exemptbytes - high bandwidth bytes transmitted at the beginning of the recording period
88 avgrate - average low bandwidth rate to enforce over the recording period
89 avgexemptrate - average high bandwidth rate to enforce over the recording period
92 def __init__(self, xid, name, maxrate, maxexemptrate, bytes, exemptbytes):
95 self.reset(maxrate, maxexemptrate, bytes, exemptbytes)
100 def reset(self, maxrate, maxexemptrate, bytes, exemptbytes):
102 Begin a new recording period. Remove caps by restoring limits
103 to their default values.
106 # Reset baseline time
107 self.time = time.time()
109 # Reset baseline byte coutns
111 self.exemptbytes = exemptbytes
113 # Query Node Manager for max rate overrides
114 (new_maxrate, new_maxexemptrate) = nm.query(self.name, ['nm_net_max_rate', 'nm_net_max_exempt_rate'])
115 if new_maxrate is not None:
118 new_maxrate = default_maxrate
119 if new_maxexemptrate is not None:
120 new_maxexemptrate *= 1000
122 new_maxexemptrate = default_maxexemptrate
124 if new_maxrate != maxrate or new_maxexemptrate != maxexemptrate:
125 print "%s reset to %s/%s" % \
127 bwlimit.format_tc_rate(new_maxrate),
128 bwlimit.format_tc_rate(new_maxexemptrate))
129 bwlimit.set(xid = self.xid, maxrate = new_maxrate, maxexemptrate = new_maxexemptrate)
131 def update(self, maxrate, maxexemptrate, bytes, exemptbytes):
133 Update byte counts and check if average rates have been
134 exceeded. In the worst case (instantaneous usage of the entire
135 average daily byte limit at the beginning of the recording
136 period), the slice will be immediately capped and will get to
137 send twice the average daily byte limit. In the common case,
138 it will get to send slightly more than the average daily byte
142 # Query Node Manager for max average rate overrides
143 (self.avgrate, self.avgexemptrate) = nm.query(self.name, ['nm_net_max_rate', 'nm_net_max_exempt_rate'])
144 if self.avgrate is None:
145 self.avgrate = default_avgrate
146 if self.avgexemptrate is None:
147 self.avgexemptrate = default_avgexemptrate
149 # Prepare message parameters from the template
151 params = {'slice': self.name, 'hostname': socket.gethostname(),
152 'since': time.asctime(time.gmtime(self.time)) + " GMT",
153 'until': time.asctime(time.gmtime(self.time + period)) + " GMT",
154 'date': time.asctime(time.gmtime()) + " GMT",
155 'period': format_period(period)}
157 bytelimit = self.avgrate * period / bits_per_byte
158 if bytes >= (self.bytes + bytelimit) and \
159 maxrate > self.avgrate:
160 new_maxrate = self.avgrate
162 new_maxrate = maxrate
164 # Format template parameters for low bandwidth message
165 params['class'] = "low bandwidth"
166 params['bytes'] = format_bytes(bytes - self.bytes)
167 params['maxrate'] = bwlimit.format_tc_rate(maxrate)
168 params['limit'] = format_bytes(bytelimit)
169 params['avgrate'] = bwlimit.format_tc_rate(self.avgrate)
172 print "%(slice)s %(class)s " \
173 "%(bytes)s/%(limit)s (%(maxrate)s/%(avgrate)s)" % \
176 # Cap low bandwidth burst rate
177 if new_maxrate != maxrate:
178 message += template % params
179 print "%(slice)s %(class)s capped at %(avgrate)s (%(bytes)s/%(limit)s)" % params
181 exemptbytelimit = self.avgexemptrate * period / bits_per_byte
182 if exemptbytes >= (self.exemptbytes + exemptbytelimit) and \
183 maxexemptrate > self.avgexemptrate:
184 new_maxexemptrate = self.avgexemptrate
186 new_maxexemptrate = maxexemptrate
188 # Format template parameters for high bandwidth message
189 params['class'] = "high bandwidth"
190 params['bytes'] = format_bytes(exemptbytes - self.exemptbytes)
191 params['maxrate'] = bwlimit.format_tc_rate(maxexemptrate)
192 params['limit'] = format_bytes(exemptbytelimit)
193 params['avgrate'] = bwlimit.format_tc_rate(self.avgexemptrate)
196 print "%(slice)s %(class)s " \
197 "%(bytes)s/%(limit)s (%(maxrate)s/%(avgrate)s)" % \
200 # Cap high bandwidth burst rate
201 if new_maxexemptrate != maxexemptrate:
202 message += template % params
203 print "%(slice)s %(class)s capped at %(avgrate)s (%(bytes)s/%(limit)s)" % params
206 if new_maxrate != maxrate or new_maxexemptrate != maxexemptrate:
207 bwlimit.set(xid = self.xid, maxrate = new_maxrate, maxexemptrate = new_maxexemptrate)
211 subject = "pl_mom capped bandwidth of slice %(slice)s on %(hostname)s" % params
214 print message + (footer % params)
216 slicemail(self.name, subject, message + (footer % params))
220 Usage: %s [OPTIONS]...
223 -d, --debug Enable debugging (default: %s)
224 -v, --verbose Increase verbosity level (default: %d)
225 -f, --file=FILE Data file (default: %s)
226 -s, --slice=SLICE Constrain monitoring to these slices (default: all)
227 -p, --period=SECONDS Interval in seconds over which to enforce average byte limits (default: %s)
228 -h, --help This message
229 """.lstrip() % (sys.argv[0], debug, verbose, datafile, format_period(period))
233 global debug, verbose, datafile, period, nm
238 longopts = ["debug", "verbose", "file=", "slice=", "period=", "help"]
239 (opts, argv) = getopt.getopt(sys.argv[1:], "dvf:s:p:h", longopts)
240 except getopt.GetoptError, err:
241 print "Error: " + err.msg
245 for (opt, optval) in opts:
246 if opt == "-d" or opt == "--debug":
248 elif opt == "-v" or opt == "--verbose":
250 bwlimit.verbose = verbose - 1
251 elif opt == "-f" or opt == "--file":
253 elif opt == "-s" or opt == "--slice":
255 elif opt == "-p" or opt == "--period":
261 # Check if we are already running
265 # Redirect stdout and stderr to syslog
266 syslog.openlog("bwmon")
267 sys.stdout = sys.stderr = Logger()
270 f = open(datafile, "r+")
272 print "Loading %s" % datafile
273 (version, slices) = pickle.load(f)
275 # Check version of data file
276 if version != "$Id: bwmon.py,v 1.1 2006/04/25 14:40:28 mlhuang Exp $":
277 print "Not using old version '%s' data file %s" % (version, datafile)
280 version = "$Id: bwmon.py,v 1.1 2006/04/25 14:40:28 mlhuang Exp $"
283 # Get special slice IDs
284 root_xid = bwlimit.get_xid("root")
285 default_xid = bwlimit.get_xid("default")
287 # Open connection to Node Manager
291 for params in bwlimit.get():
294 minexemptrate, maxexemptrate,
295 bytes, exemptbytes) = params
298 # Ignore root and default buckets
299 if xid == root_xid or xid == default_xid:
302 name = bwlimit.get_slice(xid)
304 # Orphaned (not associated with a slice) class
307 # Monitor only the specified slices
308 if names and name not in names:
311 if slices.has_key(xid):
313 if time.time() >= (slice.time + period) or \
314 bytes < slice.bytes or exemptbytes < slice.exemptbytes:
315 # Reset to defaults every 24 hours or if it appears
316 # that the byte counters have overflowed (or, more
317 # likely, the node was restarted or the HTB buckets
318 # were re-initialized).
319 slice.reset(maxrate, maxexemptrate, bytes, exemptbytes)
322 slice.update(maxrate, maxexemptrate, bytes, exemptbytes)
324 # New slice, initialize state
325 slice = slices[xid] = Slice(xid, name, maxrate, maxexemptrate, bytes, exemptbytes)
328 dead = Set(slices.keys()) - Set(live)
332 # Close connection to Node Manager
336 print "Saving %s" % datafile
337 f = open(datafile, "w")
338 pickle.dump((version, slices), f)
343 if __name__ == '__main__':