3 # Average bandwidth monitoring script. Run periodically via cron(8) to
4 # enforce a soft limit on daily bandwidth usage for each slice. If a
5 # slice is found to have exceeded its daily bandwidth usage when the
6 # script is run, its instantaneous rate will be capped at the desired
7 # average rate. Thus, in the worst case, a slice will only be able to
8 # send a little more than twice its average daily limit.
10 # Two separate limits are enforced, one for destinations exempt from
11 # the node bandwidth cap, and the other for all other destinations.
13 # Mark Huang <mlhuang@cs.princeton.edu>
14 # Andy Bavier <acb@cs.princeton.edu>
15 # Faiyaz Ahmed <faiyaza@cs.princeton.edu>
16 # Copyright (C) 2004-2006 The Trustees of Princeton University
18 # $Id: bwmon.py,v 1.20 2007/01/10 16:51:04 faiyaza Exp $
36 seconds_per_day = 24 * 60 * 60
42 datafile = "/var/lib/misc/bwmon.dat"
45 # Burst to line rate (or node cap). Set by NM.
46 default_maxrate = bwlimit.get_bwcap()
47 default_maxi2rate = bwlimit.bwmax
50 # What we cap to when slices break the rules.
51 # 500 Kbit or 5.4 GB per day
52 #default_avgrate = 500000
53 # 1.5 Mbit or 16.4 GB per day
54 #default_avgexemptrate = 1500000
56 # 5.4 Gbyte per day. 5.4 * 1024 k * 1024M * 1024G
57 # 5.4 Gbyte per day max allowed transfered per recording period
58 default_ByteMax = 5798205850
59 default_ByteThresh = int(.8 * default_ByteMax)
60 # 16.4 Gbyte per day max allowed transfered per recording period to I2
61 default_ExemptByteMax = 17609365914
62 default_ExemptByteThresh = int(.8 * default_ExemptByteMax)
66 period = 1 * seconds_per_day
71 The slice %(slice)s has transmitted more than %(bytes)s from
72 %(hostname)s to %(class)s destinations
75 Its maximum %(class)s burst rate will be capped at %(new_maxrate)s/s
78 Please reduce the average %(class)s transmission rate
79 of the slice to %(limit)s per %(period)s.
85 %(date)s %(hostname)s bwcap %(slice)s
90 Stores the last recorded bandwidth parameters of a slice.
92 xid - slice context/VServer ID
94 time - beginning of recording period in UNIX seconds
95 bytes - low bandwidth bytes transmitted at the beginning of the recording period
96 i2bytes - high bandwidth bytes transmitted at the beginning of the recording period (for I2 -F)
97 ByteMax - total volume of data allowed
98 ByteThresh - After thresh, cap node to (maxbyte - bytes)/(time left in period)
99 ExemptByteMax - Same as above, but for i2.
100 ExemptByteThresh - i2 ByteThresh
101 maxrate - max_rate slice attribute.
102 maxexemptrate - max_exempt_rate slice attribute.
103 self.emailed = did we email during this recording period
107 def __init__(self, xid, name, maxrate, maxexemptrate, bytes, exemptbytes):
113 self.MaxRate = default_maxrate
114 self.MinRate = default_MinRate
115 self.Mini2Rate = default_MinRate
116 self.Maxi2Rate = default_maxi2rate
117 self.MaxKByte = default_ByteMax
118 self.ThreshKByte = default_ByteThresh
119 self.Maxi2KByte = default_ExemptByteMax
120 self.Threshi2KByte = default_ExemptByteThresh
123 # Get real values where applicable
124 self.reset(maxrate, maxi2rate, bytes, i2bytes)
129 def updateSliceAttributes(self, data):
131 for sliver in data['slivers']:
132 for attribute in sliver['attributes']:
133 if attribute['name'] == 'net_min_rate':
134 self.MinRate = attribute['value']
135 elif attribute['name'] == 'net_max_rate':
136 self.MaxRate = attribute['value']
137 elif attribute['name'] == 'net_i2_min_rate':
138 self.Mini2Rate = attribute['value']
139 elif attribute['name'] == 'net_i2_max_rate':
140 self.Maxi2Rate = attribute['value']
141 elif attribute['name'] == 'net_max_kbyte':
142 self.M = attribute['value']
143 elif attribute['name'] == 'net_i2_max_kbyte':
144 self.minrate = attribute['value']
145 elif attribute['name'] == 'net_thresh_kbyte':
146 self.minrate = attribute['value']
147 elif attribute['name'] == 'net_i2_thresh_kbyte':
148 self.minrate = attribute['value']
150 def reset(self, maxrate, maxi2rate, bytes, i2bytes):
152 Begin a new recording period. Remove caps by restoring limits
153 to their default values.
156 # Query Node Manager for max rate overrides
157 self.updateSliceAttributes()
159 # Reset baseline time
160 self.time = time.time()
162 # Reset baseline byte coutns
164 self.i2bytes = exemptbytes
170 if (self.MaxRate != maxrate) or (self.Maxi2Rate != maxi2rate):
171 print "%s reset to %s/%s" % \
173 bwlimit.format_tc_rate(self.MaxRate),
174 bwlimit.format_tc_rate(self.Maxi2Rate))
175 bwlimit.set(xid = self.xid, maxrate = self.MaxRate, maxexemptrate = self.Maxi2Rate)
177 def update(self, maxrate, maxi2rate, bytes, ibytes):
179 Update byte counts and check if byte limits have been
183 # Query Node Manager for max rate overrides
184 self.updateSliceAttributes()
186 # Prepare message parameters from the template
188 params = {'slice': self.name, 'hostname': socket.gethostname(),
189 'since': time.asctime(time.gmtime(self.time)) + " GMT",
190 'until': time.asctime(time.gmtime(self.time + period)) + " GMT",
191 'date': time.asctime(time.gmtime()) + " GMT",
192 'period': format_period(period)}
194 if bytes >= (self.bytes + self.ByteThresh):
196 int(((self.ByteMax - (bytes - self.bytes)) * 8)/(period - int(time.time() - self.time)))
197 if new_maxrate < default_MinRate:
198 new_maxrate = default_MinRate
200 new_maxrate = maxrate
202 # Format template parameters for low bandwidth message
203 params['class'] = "low bandwidth"
204 params['bytes'] = format_bytes(bytes - self.bytes)
205 params['maxrate'] = bwlimit.format_tc_rate(maxrate)
206 params['limit'] = format_bytes(self.ByteMax)
207 params['new_maxrate'] = bwlimit.format_tc_rate(new_maxrate)
210 print "%(slice)s %(class)s " \
211 "%(bytes)s of %(limit)s (%(new_maxrate)s/s maxrate)" % \
214 # Cap low bandwidth burst rate
215 if new_maxrate != maxrate:
216 message += template % params
217 print "%(slice)s %(class)s capped at %(new_maxrate)s/s " % params
219 if exemptbytes >= (self.exemptbytes + self.ExemptByteThresh):
220 new_maxexemptrate = \
221 int(((self.ExemptByteMax - (self.bytes - bytes)) * 8)/(period - int(time.time() - self.time)))
222 if new_maxexemptrate < default_MinRate:
223 new_maxexemptrate = default_MinRate
225 new_maxexemptrate = maxexemptrate
227 # Format template parameters for high bandwidth message
228 params['class'] = "high bandwidth"
229 params['bytes'] = format_bytes(exemptbytes - self.exemptbytes)
230 params['maxrate'] = bwlimit.format_tc_rate(maxexemptrate)
231 params['limit'] = format_bytes(self.ExemptByteMax)
232 params['new_maxexemptrate'] = bwlimit.format_tc_rate(new_maxexemptrate)
235 print "%(slice)s %(class)s " \
236 "%(bytes)s of %(limit)s (%(new_maxrate)s/s maxrate)" % params
238 # Cap high bandwidth burst rate
239 if new_maxexemptrate != maxexemptrate:
240 message += template % params
241 print "%(slice)s %(class)s capped at %(new_maxexemptrate)s/s" % params
244 if new_maxrate != maxrate or new_maxexemptrate != maxexemptrate:
245 bwlimit.set(xid = self.xid, maxrate = new_maxrate, maxexemptrate = new_maxexemptrate)
248 if message and self.emailed == False:
249 subject = "pl_mom capped bandwidth of slice %(slice)s on %(hostname)s" % params
252 print message + (footer % params)
255 slicemail(self.name, subject, message + (footer % params))
259 global datafile, period
262 # Check if we are already running
266 f = open(datafile, "r+")
268 print "Loading %s" % datafile
269 (version, slices) = pickle.load(f)
271 # Check version of data file
272 if version != "$Id: bwmon.py,v 1.20 2007/01/10 16:51:04 faiyaza Exp $":
273 print "Not using old version '%s' data file %s" % (version, datafile)
276 version = "$Id: bwmon.py,v 1.20 2007/01/10 16:51:04 faiyaza Exp $"
279 # Get special slice IDs
280 root_xid = bwlimit.get_xid("root")
281 default_xid = bwlimit.get_xid("default")
284 # Get actuall running values from tc.
285 for params in bwlimit.get():
288 minexemptrate, maxexemptrate,
289 bytes, i2bytes) = params
292 # Ignore root and default buckets
293 if xid == root_xid or xid == default_xid:
296 name = bwlimit.get_slice(xid)
298 # Orphaned (not associated with a slice) class
301 # Monitor only the specified slices
302 if names and name not in names:
304 #slices is populated from the pickle file
305 #xid is populated from bwlimit (read from /etc/passwd)
306 if slices.has_key(xid):
308 if time.time() >= (slice.time + period) or \
309 bytes < slice.bytes or i2bytes < slice.i2bytes:
310 # Reset to defaults every 24 hours or if it appears
311 # that the byte counters have overflowed (or, more
312 # likely, the node was restarted or the HTB buckets
313 # were re-initialized).
314 slice.reset(maxrate, maxexemptrate, bytes, exemptbytes)
317 slice.update(maxrate, maxexemptrate, bytes, exemptbytes)
319 # New slice, initialize state
320 slice = slices[xid] = Slice(xid, name, maxrate, maxexemptrate, bytes, exemptbytes)
323 dead = Set(slices.keys()) - Set(live)
328 print "Saving %s" % datafile
329 f = open(datafile, "w")
330 pickle.dump((version, slices), f)
335 def GetSlivers(data):
336 for sliver in data['slivers']:
337 if sliver.has_key('attributes'):
339 for attribute in sliver['attributes']:
340 if attribute['name'] == "KByteThresh": print attribute['value']
342 def start(options, config):
346 if __name__ == '__main__':