3 # Average bandwidth monitoring script. Run periodically via cron(8) to
4 # enforce a soft limit on daily bandwidth usage for each slice. If a
5 # slice is found to have exceeded its daily bandwidth usage when the
6 # script is run, its instantaneous rate will be capped at the desired
7 # average rate. Thus, in the worst case, a slice will only be able to
8 # send a little more than twice its average daily limit.
10 # Two separate limits are enforced, one for destinations exempt from
11 # the node bandwidth cap, and the other for all other destinations.
13 # Mark Huang <mlhuang@cs.princeton.edu>
14 # Andy Bavier <acb@cs.princeton.edu>
15 # Faiyaz Ahmed <faiyaza@cs.princeton.edu>
16 # Copyright (C) 2004-2006 The Trustees of Princeton University
18 # $Id: bwmon.py,v 1.1.2.4 2007/02/27 23:30:05 faiyaza Exp $
33 sys.path.append("/etc/planetlab")
34 from plc_config import *
36 logger.log("bwmon: Warning: Configuration file /etc/planetlab/plc_config.py not found")
37 PLC_NAME = "PlanetLab"
38 PLC_SLICE_PREFIX = "pl"
39 PLC_MAIL_SUPPORT_ADDRESS = "support@planet-lab.org"
40 PLC_MAIL_SLICE_ADDRESS = "SLICE@slices.planet-lab.org"
47 seconds_per_day = 24 * 60 * 60
53 datafile = "/var/lib/misc/bwmon.dat"
56 # Burst to line rate (or node cap). Set by NM. in KBit/s
57 default_MaxRate = int(bwlimit.get_bwcap() / 1000)
58 default_Maxi2Rate = int(bwlimit.bwmax / 1000)
62 # 5.4 Gbyte per day. 5.4 * 1024 k * 1024M * 1024G
63 # 5.4 Gbyte per day max allowed transfered per recording period
64 default_MaxKByte = 5662310
65 default_ThreshKByte = int(.8 * default_MaxKByte)
66 # 16.4 Gbyte per day max allowed transfered per recording period to I2
67 default_Maxi2KByte = 17196646
68 default_Threshi2KByte = int(.8 * default_Maxi2KByte)
69 # Default share quanta
73 period = 1 * seconds_per_day
78 The slice %(slice)s has transmitted more than %(bytes)s from
79 %(hostname)s to %(class)s destinations
82 Its maximum %(class)s burst rate will be capped at %(new_maxrate)s/s
85 Please reduce the average %(class)s transmission rate
86 of the slice to %(limit)s per %(period)s.
92 %(date)s %(hostname)s bwcap %(slice)s
95 def format_bytes(bytes, si = True):
97 Formats bytes into a string
102 # Officially, a kibibyte
105 if bytes >= (kilo * kilo * kilo):
106 return "%.1f GB" % (bytes / (kilo * kilo * kilo))
107 elif bytes >= 1000000:
108 return "%.1f MB" % (bytes / (kilo * kilo))
110 return "%.1f KB" % (bytes / kilo)
112 return "%.0f bytes" % bytes
114 def format_period(seconds):
116 Formats a period in seconds into a string
119 if seconds == (24 * 60 * 60):
121 elif seconds == (60 * 60):
123 elif seconds > (24 * 60 * 60):
124 return "%.1f days" % (seconds / 24. / 60. / 60.)
125 elif seconds > (60 * 60):
126 return "%.1f hours" % (seconds / 60. / 60.)
128 return "%.1f minutes" % (seconds / 60.)
130 return "%.0f seconds" % seconds
132 def slicemail(slice, subject, body):
133 sendmail = os.popen("/usr/sbin/sendmail -N never -t -f%s" % PLC_MAIL_SUPPORT_ADDRESS, "w")
135 # PLC has a separate list for pl_mom messages
136 if PLC_MAIL_SUPPORT_ADDRESS == "support@planet-lab.org":
137 to = ["pl-mom@planet-lab.org"]
139 to = [PLC_MAIL_SUPPORT_ADDRESS]
141 if slice is not None and slice != "root":
142 to.append(PLC_MAIL_SLICE_ADDRESS.replace("SLICE", slice))
144 header = {'from': "%s Support <%s>" % (PLC_NAME, PLC_MAIL_SUPPORT_ADDRESS),
146 'version': sys.version.split(" ")[0],
152 Content-type: text/plain
156 X-Mailer: Python/%(version)s
159 """.lstrip() % header)
169 Stores the last recorded bandwidth parameters of a slice.
171 xid - slice context/VServer ID
173 time - beginning of recording period in UNIX seconds
174 bytes - low bandwidth bytes transmitted at the beginning of the recording period
175 i2bytes - high bandwidth bytes transmitted at the beginning of the recording period (for I2 -F)
176 ByteMax - total volume of data allowed
177 ByteThresh - After thresh, cap node to (maxbyte - bytes)/(time left in period)
178 ExemptByteMax - Same as above, but for i2.
179 ExemptByteThresh - i2 ByteThresh
180 maxrate - max_rate slice attribute.
181 maxexemptrate - max_exempt_rate slice attribute.
182 self.emailed = did we email during this recording period
186 def __init__(self, xid, name, data):
192 self.MaxRate = default_MaxRate
193 self.MinRate = default_MinRate
194 self.Maxi2Rate = default_Maxi2Rate
195 self.Mini2Rate = default_Mini2Rate
196 self.MaxKByte = default_MaxKByte
197 self.ThreshKByte = default_ThreshKByte
198 self.Maxi2KByte = default_Maxi2KByte
199 self.Threshi2KByte = default_Threshi2KByte
200 self.Share = default_Share
203 self.updateSliceAttributes(data)
204 bwlimit.set(xid = self.xid,
205 minrate = self.MinRate * 1000,
206 maxrate = self.MaxRate * 1000,
207 maxexemptrate = self.Maxi2Rate * 1000,
208 minexemptrate = self.Mini2Rate * 1000,
215 def updateSliceAttributes(self, data):
216 # Incase the limits have changed.
217 if (self.MaxRate != default_MaxRate) or \
218 (self.Maxi2Rate != default_Maxi2Rate):
219 self.MaxRate = int(bwlimit.get_bwcap() / 1000)
220 self.Maxi2Rate = int(bwlimit.bwmax / 1000)
223 for sliver in data['slivers']:
224 if sliver['name'] == self.name:
225 for attribute in sliver['attributes']:
226 if attribute['name'] == 'net_min_rate':
227 logger.log("bwmon: Updating %s. Min Rate = %s" \
228 %(self.name, self.MinRate))
229 # To ensure min does not go above 25% of nodecap.
230 if int(attribute['value']) > int(.25 * default_MaxRate):
231 self.MinRate = int(.25 * default_MaxRate)
233 self.MinRate = int(attribute['value'])
234 elif attribute['name'] == 'net_max_rate':
235 self.MaxRate = int(attribute['value'])
236 logger.log("bwmon: Updating %s. Max Rate = %s" \
237 %(self.name, self.MaxRate))
238 elif attribute['name'] == 'net_i2_min_rate':
239 self.Mini2Rate = int(attribute['value'])
240 logger.log("bwmon: Updating %s. Min i2 Rate = %s" \
241 %(self.name, self.Mini2Rate))
242 elif attribute['name'] == 'net_i2_max_rate':
243 self.Maxi2Rate = int(attribute['value'])
244 logger.log("bwmon: Updating %s. Max i2 Rate = %s" \
245 %(self.name, self.Maxi2Rate))
246 elif attribute['name'] == 'net_max_kbyte':
247 self.MaxKByte = int(attribute['value'])
248 logger.log("bwmon: Updating %s. Max KByte lim = %s" \
249 %(self.name, self.MaxKByte))
250 elif attribute['name'] == 'net_i2_max_kbyte':
251 self.Maxi2KByte = int(attribute['value'])
252 logger.log("bwmon: Updating %s. Max i2 KByte = %s" \
253 %(self.name, self.Maxi2KByte))
254 elif attribute['name'] == 'net_thresh_kbyte':
255 self.ThreshKByte = int(attribute['value'])
256 logger.log("bwmon: Updating %s. Thresh KByte = %s" \
257 %(self.name, self.ThreshKByte))
258 elif attribute['name'] == 'net_i2_thresh_kbyte':
259 self.Threshi2KByte = int(attribute['value'])
260 logger.log("bwmon: Updating %s. i2 Thresh KByte = %s" \
261 %(self.name, self.Threshi2KByte))
262 elif attribute['name'] == 'net_share':
263 self.Share = int(attribute['value'])
264 logger.log("bwmon: Updating %s. Net Share = %s" \
265 %(self.name, self.Share))
266 elif attribute['name'] == 'net_i2_share':
267 self.Sharei2 = int(attribute['value'])
268 logger.log("bwmon: Updating %s. Net i2 Share = %s" \
269 %(self.name, self.i2Share))
272 def reset(self, runningmaxrate, runningmaxi2rate, usedbytes, usedi2bytes, data):
274 Begin a new recording period. Remove caps by restoring limits
275 to their default values.
278 # Query Node Manager for max rate overrides
279 self.updateSliceAttributes(data)
281 # Reset baseline time
282 self.time = time.time()
284 # Reset baseline byte coutns
285 self.bytes = usedbytes
286 self.i2bytes = usedi2bytes
290 maxrate = self.MaxRate * 1000
291 maxi2rate = self.Maxi2Rate * 1000
293 if (self.MaxRate != runningmaxrate) or (self.Maxi2Rate != runningmaxi2rate):
294 logger.log("bwmon: %s reset to %s/%s" % \
296 bwlimit.format_tc_rate(maxrate),
297 bwlimit.format_tc_rate(maxi2rate)))
298 bwlimit.set(xid = self.xid,
299 minrate = self.MinRate * 1000,
300 maxrate = self.MaxRate * 1000,
301 maxexemptrate = self.Maxi2Rate * 1000,
302 minexemptrate = self.Mini2Rate * 1000,
305 def update(self, runningmaxrate, runningmaxi2rate, usedbytes, usedi2bytes, data):
307 Update byte counts and check if byte limits have been
311 # Query Node Manager for max rate overrides
312 self.updateSliceAttributes(data)
314 # Prepare message parameters from the template
316 params = {'slice': self.name, 'hostname': socket.gethostname(),
317 'since': time.asctime(time.gmtime(self.time)) + " GMT",
318 'until': time.asctime(time.gmtime(self.time + period)) + " GMT",
319 'date': time.asctime(time.gmtime()) + " GMT",
320 'period': format_period(period)}
322 if usedbytes >= (self.bytes + (self.ThreshKByte * 1024)):
323 sum = self.bytes + (self.ThreshKBytes * 1024)
324 maxbyte = self.MaxKByte * 1024
325 bytesused = usedbytes - self.bytes
326 timeused = int(time.time() - self.time)
327 new_maxrate = int(((maxbyte - bytesused) * 8)/(period - timeused))
328 if new_maxrate < (self.MinRate * 1000):
329 new_maxrate = self.MinRate * 1000
331 new_maxrate = self.MaxRate * 1000
333 # Format template parameters for low bandwidth message
334 params['class'] = "low bandwidth"
335 params['bytes'] = format_bytes(usedbytes - self.bytes)
336 params['limit'] = format_bytes(self.MaxKByte * 1024)
337 params['new_maxrate'] = bwlimit.format_tc_rate(new_maxrate)
340 logger.log("bwmon: %(slice)s %(class)s " \
341 "%(bytes)s of %(limit)s (%(new_maxrate)s/s maxrate)" % \
344 # Cap low bandwidth burst rate
345 if new_maxrate != runningmaxrate:
346 message += template % params
347 logger.log("bwmon: ** %(slice)s %(class)s capped at %(new_maxrate)s/s " % params)
349 if usedi2bytes >= (self.i2bytes + (self.Threshi2KByte * 1024)):
350 maxi2byte = self.Maxi2KByte * 1024
351 i2bytesused = usedi2bytes - self.i2bytes
352 timeused = int(time.time() - self.time)
353 new_maxi2rate = int(((maxi2byte - i2bytesused) * 8)/(period - timeused))
354 if new_maxi2rate < (self.Mini2Rate * 1000):
355 new_maxi2rate = self.Mini2Rate * 1000
357 new_maxi2rate = self.Maxi2Rate * 1000
359 # Format template parameters for high bandwidth message
360 params['class'] = "high bandwidth"
361 params['bytes'] = format_bytes(usedi2bytes - self.i2bytes)
362 params['limit'] = format_bytes(self.Maxi2KByte * 1024)
363 params['new_maxexemptrate'] = bwlimit.format_tc_rate(new_maxi2rate)
366 logger.log("bwmon: %(slice)s %(class)s " \
367 "%(bytes)s of %(limit)s (%(new_maxrate)s/s maxrate)" % params)
369 # Cap high bandwidth burst rate
370 if new_maxi2rate != runningmaxi2rate:
371 message += template % params
372 logger.log("bwmon: %(slice)s %(class)s capped at %(new_maxexemptrate)s/s" % params)
375 if new_maxrate != runningmaxrate or new_maxi2rate != runningmaxi2rate:
376 bwlimit.set(xid = self.xid, maxrate = new_maxrate, maxexemptrate = new_maxi2rate)
379 if message and self.emailed == False:
380 subject = "pl_mom capped bandwidth of slice %(slice)s on %(hostname)s" % params
382 logger.log("bwmon: "+ subject)
383 logger.log("bwmon: "+ message + (footer % params))
386 slicemail(self.name, subject, message + (footer % params))
388 def GetSlivers(data):
396 default_ThreshKByte,\
398 default_Threshi2KByte,\
405 # Incase the limits have changed.
406 default_MaxRate = int(bwlimit.get_bwcap() / 1000)
407 default_Maxi2Rate = int(bwlimit.bwmax / 1000)
409 # Incase default isn't set yet.
410 if default_MaxRate == -1:
411 default_MaxRate = 1000000
414 f = open(datafile, "r+")
415 logger.log("bwmon: Loading %s" % datafile)
416 (version, slices) = pickle.load(f)
418 # Check version of data file
419 if version != "$Id: bwmon.py,v 1.1.2.4 2007/02/27 23:30:05 faiyaza Exp $":
420 logger.log("bwmon: Not using old version '%s' data file %s" % (version, datafile))
423 version = "$Id: bwmon.py,v 1.1.2.4 2007/02/27 23:30:05 faiyaza Exp $"
426 # Get/set special slice IDs
427 root_xid = bwlimit.get_xid("root")
428 default_xid = bwlimit.get_xid("default")
430 if root_xid not in slices.keys():
431 slices[root_xid] = Slice(root_xid, "root", data)
432 slices[root_xid].reset(0, 0, 0, 0, data)
434 if default_xid not in slices.keys():
435 slices[default_xid] = Slice(default_xid, "default", data)
436 slices[default_xid].reset(0, 0, 0, 0, data)
439 # Get running slivers. {xid: name}
440 for sliver in data['slivers']:
441 live[bwlimit.get_xid(sliver['name'])] = sliver['name']
444 # live.xids - runing.xids = new.xids
445 newslicesxids = Set(live.keys()) - Set(slices.keys())
446 for newslicexid in newslicesxids:
447 if newslicexid != None:
448 logger.log("bwmon: New Slice %s" % live[newslicexid])
449 slices[newslicexid] = Slice(newslicexid, live[newslicexid], data)
450 slices[newslicexid].reset(0, 0, 0, 0, data)
452 logger.log("bwmon Slice %s doesn't have xid. Must be delegated. Skipping." % live[newslicexid])
453 # Get actual running values from tc.
454 # Update slice totals and bandwidth.
455 for params in bwlimit.get():
458 minexemptrate, maxexemptrate,
459 usedbytes, usedi2bytes) = params
461 # Ignore root and default buckets
462 if xid == root_xid or xid == default_xid:
465 name = bwlimit.get_slice(xid)
467 # Orphaned (not associated with a slice) class
471 # Monitor only the specified slices
472 if names and name not in names:
474 #slices is populated from the pickle file
475 #xid is populated from bwlimit (read from /etc/passwd)
476 if slices.has_key(xid):
478 if time.time() >= (slice.time + period) or \
479 usedbytes < slice.bytes or usedi2bytes < slice.i2bytes:
480 # Reset to defaults every 24 hours or if it appears
481 # that the byte counters have overflowed (or, more
482 # likely, the node was restarted or the HTB buckets
483 # were re-initialized).
484 slice.reset(maxrate, maxexemptrate, usedbytes, usedi2bytes, data)
487 slice.update(maxrate, maxexemptrate, usedbytes, usedi2bytes, data)
489 # Just in case. Probably (hopefully) this will never happen.
490 # New slice, initialize state
491 logger.log("bwmon: New Slice %s" % name)
492 slice = slices[xid] = Slice(xid, name, data)
493 slice.reset(maxrate, maxexemptrate, usedbytes, usedi2bytes, data)
496 dead = Set(slices.keys()) - Set(live.keys())
498 if xid == root_xid or xid == default_xid:
503 logger.log("bwmon: Saving %s" % datafile)
504 f = open(datafile, "w")
505 pickle.dump((version, slices), f)
509 #def GetSlivers(data):
510 # for sliver in data['slivers']:
511 # if sliver.has_key('attributes'):
513 # for attribute in sliver['attributes']:
514 # if attribute['name'] == "KByteThresh": print attribute['value']
516 def start(options, config):