3 # Average bandwidth monitoring script. Run periodically via cron(8) to
4 # enforce a soft limit on daily bandwidth usage for each slice. If a
5 # slice is found to have exceeded its daily bandwidth usage when the
6 # script is run, its instantaneous rate will be capped at the desired
7 # average rate. Thus, in the worst case, a slice will only be able to
8 # send a little more than twice its average daily limit.
10 # Two separate limits are enforced, one for destinations exempt from
11 # the node bandwidth cap, and the other for all other destinations.
13 # Mark Huang <mlhuang@cs.princeton.edu>
14 # Andy Bavier <acb@cs.princeton.edu>
15 # Faiyaz Ahmed <faiyaza@cs.princeton.edu>
16 # Copyright (C) 2004-2006 The Trustees of Princeton University
18 # $Id: bwmon.py,v 1.7 2006/07/10 19:19:07 faiyaza Exp $
38 seconds_per_day = 24 * 60 * 60
44 datafile = "/var/lib/misc/bwmon.dat"
47 default_maxrate = bwlimit.get_bwcap()
49 default_maxexemptrate = bwlimit.bwmax
51 # 500 Kbit or 5.4 GB per day
52 default_avgrate = 500000
54 # 1.5 Mbit or 16.4 GB per day
55 default_avgexemptrate = 1500000
58 period = 1 * seconds_per_day
63 The slice %(slice)s has transmitted more than %(bytes)s from
64 %(hostname)s to %(class)s destinations
67 Its maximum %(class)s burst rate will be capped at %(avgrate)s
70 Please reduce the average %(class)s transmission rate
71 of the slice to %(avgrate)s, or %(limit)s per %(period)s.
77 %(date)s %(hostname)s bwcap %(slice)s
82 Stores the last recorded bandwidth parameters of a slice.
84 xid - slice context/VServer ID
86 time - beginning of recording period in UNIX seconds
87 bytes - low bandwidth bytes transmitted at the beginning of the recording period
88 exemptbytes - high bandwidth bytes transmitted at the beginning of the recording period (for I2 -F)
89 last_avgrate - last recorded avgrate from NM
90 last_maxrate - last recorded maxrate from NM
91 last_avgexemptrate - last recorded avgexemptrate from NM
92 last_maxexemptrate - last recorded maxexemptrate from NM
95 def __init__(self, xid, name, maxrate, maxexemptrate, bytes, exemptbytes):
100 self.last_maxrate = default_maxrate
101 self.last_avgrate = default_avgrate
102 self.last_avgexemptrate = default_avgexemptrate
103 self.last_maxexemptrate = default_maxexemptrate
104 self.reset(maxrate, maxexemptrate, bytes, exemptbytes)
109 def reset(self, maxrate, maxexemptrate, bytes, exemptbytes):
111 Begin a new recording period. Remove caps by restoring limits
112 to their default values.
115 # Reset baseline time
116 self.time = time.time()
118 # Reset baseline byte coutns
120 self.exemptbytes = exemptbytes
122 # If NM except"ns below, and new_max* doesn't get set, use last.
123 new_maxrate = self.last_maxrate
124 new_maxexemptrate = self.last_maxexemptrate
126 # Query Node Manager for max rate overrides
128 vals = nm.query(self.name, [('nm_net_max_rate', self.last_maxrate),
129 ('nm_net_max_exempt_rate', self.last_maxexemptrate),
130 ('nm_net_avg_rate', self.last_avgrate),
131 ('nm_net_avg_exempt_rate', self.last_avgexemptrate)])
132 (new_maxrate, new_maxexemptrate,
133 self.last_avgrate, self.last_avgexemptrate) = vals
134 #If NM is alive, and there is a cap, update new
135 self.last_maxrate = new_maxrate
136 self.last_maxexemptrate = new_maxexemptrate
138 except Exception, err:
139 print "Warning: Exception received while querying NM:", err
141 if new_maxrate != maxrate or new_maxexemptrate != maxexemptrate:
142 print "%s reset to %s/%s" % \
144 bwlimit.format_tc_rate(new_maxrate),
145 bwlimit.format_tc_rate(new_maxexemptrate))
146 bwlimit.set(xid = self.xid, maxrate = new_maxrate, maxexemptrate = new_maxexemptrate)
148 def update(self, maxrate, maxexemptrate, bytes, exemptbytes):
150 Update byte counts and check if average rates have been
151 exceeded. In the worst case (instantaneous usage of the entire
152 average daily byte limit at the beginning of the recording
153 period), the slice will be immediately capped and will get to
154 send twice the average daily byte limit. In the common case,
155 it will get to send slightly more than the average daily byte
159 # If NM except'ns below, and avg*rate doesn't get set, use last_*.
160 avgrate = self.last_avgrate
161 avgexemptrate = self.last_avgexemptrate
163 # Query Node Manager for max average rate overrides
165 (avgrate, avgexemptrate) = nm.query(self.name,
166 [('nm_net_avg_rate', self.last_avgrate),
167 ('nm_net_avg_exempt_rate', self.last_avgexemptrate)])
168 #If NM is alive, and there is a cap, update new
169 self.last_avgexemptrate = avgexemptrate
170 self.last_avgrate = avgrate
171 except Exception, err:
172 print "Warning: Exception received while querying NM:", err
174 # Prepare message parameters from the template
176 params = {'slice': self.name, 'hostname': socket.gethostname(),
177 'since': time.asctime(time.gmtime(self.time)) + " GMT",
178 'until': time.asctime(time.gmtime(self.time + period)) + " GMT",
179 'date': time.asctime(time.gmtime()) + " GMT",
180 'period': format_period(period)}
182 bytelimit = avgrate * period / bits_per_byte
183 if bytes >= (self.bytes + bytelimit) and \
185 new_maxrate = avgrate
187 new_maxrate = maxrate
189 # Format template parameters for low bandwidth message
190 params['class'] = "low bandwidth"
191 params['bytes'] = format_bytes(bytes - self.bytes)
192 params['maxrate'] = bwlimit.format_tc_rate(maxrate)
193 params['limit'] = format_bytes(bytelimit)
194 params['avgrate'] = bwlimit.format_tc_rate(avgrate)
197 print "%(slice)s %(class)s " \
198 "%(bytes)s, %(limit)s (%(maxrate)s max/%(avgrate)s avg)" % \
201 # Cap low bandwidth burst rate
202 if new_maxrate != maxrate:
203 message += template % params
204 print "%(slice)s %(class)s capped at %(avgrate)s (%(bytes)s/%(limit)s)" % params
206 exemptbytelimit = avgexemptrate * period / bits_per_byte
207 if exemptbytes >= (self.exemptbytes + exemptbytelimit) and \
208 maxexemptrate > avgexemptrate:
209 new_maxexemptrate = avgexemptrate
211 new_maxexemptrate = maxexemptrate
213 # Format template parameters for high bandwidth message
214 params['class'] = "high bandwidth"
215 params['bytes'] = format_bytes(exemptbytes - self.exemptbytes)
216 params['maxrate'] = bwlimit.format_tc_rate(maxexemptrate)
217 params['limit'] = format_bytes(exemptbytelimit)
218 params['avgrate'] = bwlimit.format_tc_rate(avgexemptrate)
221 print "%(slice)s %(class)s " \
222 "%(bytes)s, %(limit)s (%(maxrate)s max /%(avgrate)s avg)" % \
225 # Cap high bandwidth burst rate
226 if new_maxexemptrate != maxexemptrate:
227 message += template % params
228 print "%(slice)s %(class)s capped at %(avgrate)s (%(bytes)s/%(limit)s)" % params
231 if new_maxrate != maxrate or new_maxexemptrate != maxexemptrate:
232 bwlimit.set(xid = self.xid, maxrate = new_maxrate, maxexemptrate = new_maxexemptrate)
236 subject = "pl_mom capped bandwidth of slice %(slice)s on %(hostname)s" % params
239 print message + (footer % params)
241 slicemail(self.name, subject, message + (footer % params))
245 Usage: %s [OPTIONS]...
248 -d, --debug Enable debugging (default: %s)
249 -v, --verbose Increase verbosity level (default: %d)
250 -f, --file=FILE Data file (default: %s)
251 -s, --slice=SLICE Constrain monitoring to these slices (default: all)
252 -p, --period=SECONDS Interval in seconds over which to enforce average byte limits (default: %s)
253 -h, --help This message
254 """.lstrip() % (sys.argv[0], debug, verbose, datafile, format_period(period))
258 global debug, verbose, datafile, period, nm
263 longopts = ["debug", "verbose", "file=", "slice=", "period=", "help"]
264 (opts, argv) = getopt.getopt(sys.argv[1:], "dvf:s:p:h", longopts)
265 except getopt.GetoptError, err:
266 print "Error: " + err.msg
270 for (opt, optval) in opts:
271 if opt == "-d" or opt == "--debug":
273 elif opt == "-v" or opt == "--verbose":
275 bwlimit.verbose = verbose - 1
276 elif opt == "-f" or opt == "--file":
278 elif opt == "-s" or opt == "--slice":
280 elif opt == "-p" or opt == "--period":
286 # Check if we are already running
290 # Redirect stdout and stderr to syslog
291 syslog.openlog("bwmon")
292 sys.stdout = sys.stderr = Logger()
295 f = open(datafile, "r+")
297 print "Loading %s" % datafile
298 (version, slices) = pickle.load(f)
300 # Check version of data file
301 if version != "$Id: bwmon.py,v 1.7 2006/07/10 19:19:07 faiyaza Exp $":
302 print "Not using old version '%s' data file %s" % (version, datafile)
305 version = "$Id: bwmon.py,v 1.7 2006/07/10 19:19:07 faiyaza Exp $"
308 # Get special slice IDs
309 root_xid = bwlimit.get_xid("root")
310 default_xid = bwlimit.get_xid("default")
312 #Open connection to Node Manager
316 for params in bwlimit.get():
319 minexemptrate, maxexemptrate,
320 bytes, exemptbytes) = params
323 # Ignore root and default buckets
324 if xid == root_xid or xid == default_xid:
327 name = bwlimit.get_slice(xid)
329 # Orphaned (not associated with a slice) class
332 # Monitor only the specified slices
333 if names and name not in names:
336 #slices is populated from the pickle file
337 #xid is populated from bwlimit (read from /etc/passwd)
338 if slices.has_key(xid):
340 if time.time() >= (slice.time + period) or \
341 bytes < slice.bytes or exemptbytes < slice.exemptbytes:
342 # Reset to defaults every 24 hours or if it appears
343 # that the byte counters have overflowed (or, more
344 # likely, the node was restarted or the HTB buckets
345 # were re-initialized).
346 slice.reset(maxrate, maxexemptrate, bytes, exemptbytes)
349 slice.update(maxrate, maxexemptrate, bytes, exemptbytes)
351 # New slice, initialize state
352 slice = slices[xid] = Slice(xid, name, maxrate, maxexemptrate, bytes, exemptbytes)
355 dead = Set(slices.keys()) - Set(live)
360 print "Saving %s" % datafile
361 f = open(datafile, "w")
362 pickle.dump((version, slices), f)
367 if __name__ == '__main__':