3 # Average bandwidth monitoring script. Run periodically via cron(8) to
4 # enforce a soft limit on daily bandwidth usage for each slice. If a
5 # slice is found to have exceeded its daily bandwidth usage when the
6 # script is run, its instantaneous rate will be capped at the desired
7 # average rate. Thus, in the worst case, a slice will only be able to
8 # send a little more than twice its average daily limit.
10 # Two separate limits are enforced, one for destinations exempt from
11 # the node bandwidth cap, and the other for all other destinations.
13 # Mark Huang <mlhuang@cs.princeton.edu>
14 # Andy Bavier <acb@cs.princeton.edu>
15 # Faiyaz Ahmed <faiyaza@cs.princeton.edu>
16 # Copyright (C) 2004-2006 The Trustees of Princeton University
18 # $Id: bwmon.py,v 1.18 2007/04/25 22:19:59 faiyaza Exp $
33 sys.path.append("/etc/planetlab")
34 from plc_config import *
36 logger.log("bwmon: Warning: Configuration file /etc/planetlab/plc_config.py not found")
37 PLC_NAME = "PlanetLab"
38 PLC_SLICE_PREFIX = "pl"
39 PLC_MAIL_SUPPORT_ADDRESS = "support@planet-lab.org"
40 PLC_MAIL_SLICE_ADDRESS = "SLICE@slices.planet-lab.org"
43 seconds_per_day = 24 * 60 * 60
49 datafile = "/var/lib/misc/bwmon.dat"
52 # Burst to line rate (or node cap). Set by NM. in KBit/s
53 default_MaxRate = int(bwlimit.get_bwcap() / 1000)
54 default_Maxi2Rate = int(bwlimit.bwmax / 1000)
58 # 5.4 Gbyte per day. 5.4 * 1024 k * 1024M * 1024G
59 # 5.4 Gbyte per day max allowed transfered per recording period
60 default_MaxKByte = 5662310
61 default_ThreshKByte = int(.8 * default_MaxKByte)
62 # 16.4 Gbyte per day max allowed transfered per recording period to I2
63 default_Maxi2KByte = 17196646
64 default_Threshi2KByte = int(.8 * default_Maxi2KByte)
65 # Default share quanta
69 period = 1 * seconds_per_day
74 The slice %(slice)s has transmitted more than %(bytes)s from
75 %(hostname)s to %(class)s destinations
78 Its maximum %(class)s burst rate will be capped at %(new_maxrate)s/s
81 Please reduce the average %(class)s transmission rate
82 of the slice to %(limit)s per %(period)s.
88 %(date)s %(hostname)s bwcap %(slice)s
91 def format_bytes(bytes, si = True):
93 Formats bytes into a string
98 # Officially, a kibibyte
101 if bytes >= (kilo * kilo * kilo):
102 return "%.1f GB" % (bytes / (kilo * kilo * kilo))
103 elif bytes >= 1000000:
104 return "%.1f MB" % (bytes / (kilo * kilo))
106 return "%.1f KB" % (bytes / kilo)
108 return "%.0f bytes" % bytes
110 def format_period(seconds):
112 Formats a period in seconds into a string
115 if seconds == (24 * 60 * 60):
117 elif seconds == (60 * 60):
119 elif seconds > (24 * 60 * 60):
120 return "%.1f days" % (seconds / 24. / 60. / 60.)
121 elif seconds > (60 * 60):
122 return "%.1f hours" % (seconds / 60. / 60.)
124 return "%.1f minutes" % (seconds / 60.)
126 return "%.0f seconds" % seconds
128 def slicemail(slice, subject, body):
129 sendmail = os.popen("/usr/sbin/sendmail -N never -t -f%s" % PLC_MAIL_SUPPORT_ADDRESS, "w")
131 # PLC has a separate list for pl_mom messages
132 if PLC_MAIL_SUPPORT_ADDRESS == "support@planet-lab.org":
133 to = ["pl-mom@planet-lab.org"]
135 to = [PLC_MAIL_SUPPORT_ADDRESS]
137 if slice is not None and slice != "root":
138 to.append(PLC_MAIL_SLICE_ADDRESS.replace("SLICE", slice))
140 header = {'from': "%s Support <%s>" % (PLC_NAME, PLC_MAIL_SUPPORT_ADDRESS),
142 'version': sys.version.split(" ")[0],
148 Content-type: text/plain
152 X-Mailer: Python/%(version)s
155 """.lstrip() % header)
165 Stores the last recorded bandwidth parameters of a slice.
167 xid - slice context/VServer ID
169 time - beginning of recording period in UNIX seconds
170 bytes - low bandwidth bytes transmitted at the beginning of the recording period
171 i2bytes - high bandwidth bytes transmitted at the beginning of the recording period (for I2 -F)
172 ByteMax - total volume of data allowed
173 ByteThresh - After thresh, cap node to (maxbyte - bytes)/(time left in period)
174 ExemptByteMax - Same as above, but for i2.
175 ExemptByteThresh - i2 ByteThresh
176 maxrate - max_rate slice attribute.
177 maxexemptrate - max_exempt_rate slice attribute.
178 self.emailed = did we email during this recording period
182 def __init__(self, xid, name, rspec):
188 self.MaxRate = default_MaxRate
189 self.MinRate = default_MinRate
190 self.Maxi2Rate = default_Maxi2Rate
191 self.Mini2Rate = default_Mini2Rate
192 self.MaxKByte = default_MaxKByte
193 self.ThreshKByte = default_ThreshKByte
194 self.Maxi2KByte = default_Maxi2KByte
195 self.Threshi2KByte = default_Threshi2KByte
196 self.Share = default_Share
197 self.Sharei2 = default_Share
200 self.updateSliceAttributes(rspec)
201 bwlimit.set(xid = self.xid,
202 minrate = self.MinRate * 1000,
203 maxrate = self.MaxRate * 1000,
204 maxexemptrate = self.Maxi2Rate * 1000,
205 minexemptrate = self.Mini2Rate * 1000,
211 def updateSliceAttributes(self, rspec):
214 # Sanity check plus policy decision for MinRate:
215 # Minrate cant be greater than 25% of MaxRate or NodeCap.
216 MinRate = int(rspec.get("net_min_rate", default_MinRate))
217 if MinRate > int(.25 * default_MaxRate):
218 MinRate = int(.25 * default_MaxRate)
219 if MinRate != self.MinRate:
220 self.MinRate = MinRate
221 logger.log("bwmon: Updating %s: Min Rate = %s" %(self.name, self.MinRate))
223 MaxRate = int(rspec.get('net_max_rate', bwlimit.get_bwcap() / 1000))
224 if MaxRate != self.MaxRate:
225 self.MaxRate = MaxRate
226 logger.log("bwmon: Updating %s: Max Rate = %s" %(self.name, self.MaxRate))
228 Mini2Rate = int(rspec.get('net_i2_min_rate', default_Mini2Rate))
229 if Mini2Rate != self.Mini2Rate:
230 self.Mini2Rate = Mini2Rate
231 logger.log("bwmon: Updating %s: Min i2 Rate = %s" %(self.name, self.Mini2Rate))
233 Maxi2Rate = int(rspec.get('net_i2_max_rate', bwlimit.bwmax / 1000))
234 if Maxi2Rate != self.Maxi2Rate:
235 self.Maxi2Rate = Maxi2Rate
236 logger.log("bwmon: Updating %s: Max i2 Rate = %s" %(self.name, self.Maxi2Rate))
238 MaxKByte = int(rspec.get('net_max_kbyte', default_MaxKByte))
239 if MaxKByte != self.MaxKByte:
240 self.MaxKByte = MaxKByte
241 logger.log("bwmon: Updating %s: Max KByte lim = %s" %(self.name, self.MaxKByte))
243 Maxi2KByte = int(rspec.get('net_i2_max_kbyte', default_Maxi2KByte))
244 if Maxi2KByte != self.Maxi2KByte:
245 self.Maxi2KByte = Maxi2KByte
246 logger.log("bwmon: Updating %s: Max i2 KByte = %s" %(self.name, self.Maxi2KByte))
248 ThreshKByte = int(rspec.get('net_thresh_kbyte', default_ThreshKByte))
249 if ThreshKByte != self.ThreshKByte:
250 self.ThreshKByte = ThreshKByte
251 logger.log("bwmon: Updating %s: Thresh KByte = %s" %(self.name, self.ThreshKByte))
253 Threshi2KByte = int(rspec.get('net_i2_thresh_kbyte', default_Threshi2KByte))
254 if Threshi2KByte != self.Threshi2KByte:
255 self.Threshi2KByte = Threshi2KByte
256 logger.log("bwmon: Updating %s: i2 Thresh KByte = %s" %(self.name, self.Threshi2KByte))
258 Share = int(rspec.get('net_share', default_Share))
259 if Share != self.Share:
261 logger.log("bwmon: Updating %s: Net Share = %s" %(self.name, self.Share))
263 Sharei2 = int(rspec.get('net_i2_share', default_Share))
264 if Sharei2 != self.Sharei2:
265 self.Sharei2 = Sharei2
266 logger.log("bwmon: Updating %s: Net i2 Share = %s" %(self.name, self.i2Share))
269 def reset(self, runningmaxrate, runningmaxi2rate, usedbytes, usedi2bytes, rspec):
271 Begin a new recording period. Remove caps by restoring limits
272 to their default values.
275 # Query Node Manager for max rate overrides
276 self.updateSliceAttributes(rspec)
278 # Reset baseline time
279 self.time = time.time()
281 # Reset baseline byte coutns
282 self.bytes = usedbytes
283 self.i2bytes = usedi2bytes
287 maxrate = self.MaxRate * 1000
288 maxi2rate = self.Maxi2Rate * 1000
290 if (self.MaxRate != runningmaxrate) or (self.Maxi2Rate != runningmaxi2rate):
291 logger.log("bwmon: %s reset to %s/%s" % \
293 bwlimit.format_tc_rate(maxrate),
294 bwlimit.format_tc_rate(maxi2rate)))
295 bwlimit.set(xid = self.xid,
296 minrate = self.MinRate * 1000,
297 maxrate = self.MaxRate * 1000,
298 maxexemptrate = self.Maxi2Rate * 1000,
299 minexemptrate = self.Mini2Rate * 1000,
302 def update(self, runningmaxrate, runningmaxi2rate, usedbytes, usedi2bytes, rspec):
304 Update byte counts and check if byte limits have been
308 # Query Node Manager for max rate overrides
309 self.updateSliceAttributes(rspec)
311 # Prepare message parameters from the template
313 params = {'slice': self.name, 'hostname': socket.gethostname(),
314 'since': time.asctime(time.gmtime(self.time)) + " GMT",
315 'until': time.asctime(time.gmtime(self.time + period)) + " GMT",
316 'date': time.asctime(time.gmtime()) + " GMT",
317 'period': format_period(period)}
319 if usedbytes >= (self.bytes + (self.ThreshKByte * 1024)):
321 logger.log("bwmon: %s over thresh %s" \
322 % (self.name, format_bytes(self.ThreshKByte * 1024)))
323 sum = self.bytes + (self.ThreshKByte * 1024)
324 maxbyte = self.MaxKByte * 1024
325 bytesused = usedbytes - self.bytes
326 timeused = int(time.time() - self.time)
327 new_maxrate = int(((maxbyte - bytesused) * 8)/(period - timeused))
328 if new_maxrate < (self.MinRate * 1000):
329 new_maxrate = self.MinRate * 1000
331 new_maxrate = self.MaxRate * 1000
333 # Format template parameters for low bandwidth message
334 params['class'] = "low bandwidth"
335 params['bytes'] = format_bytes(usedbytes - self.bytes)
336 params['limit'] = format_bytes(self.MaxKByte * 1024)
337 params['thresh'] = format_bytes(self.ThreshKByte * 1024)
338 params['new_maxrate'] = bwlimit.format_tc_rate(new_maxrate)
341 logger.log("bwmon: %(slice)s %(class)s " \
342 "%(bytes)s of %(limit)s max %(thresh)s thresh (%(new_maxrate)s/s maxrate)" % \
345 # Cap low bandwidth burst rate
346 if new_maxrate != runningmaxrate:
347 message += template % params
348 logger.log("bwmon: ** %(slice)s %(class)s capped at %(new_maxrate)s/s " % params)
350 if usedi2bytes >= (self.i2bytes + (self.Threshi2KByte * 1024)):
351 maxi2byte = self.Maxi2KByte * 1024
352 i2bytesused = usedi2bytes - self.i2bytes
353 timeused = int(time.time() - self.time)
354 new_maxi2rate = int(((maxi2byte - i2bytesused) * 8)/(period - timeused))
355 if new_maxi2rate < (self.Mini2Rate * 1000):
356 new_maxi2rate = self.Mini2Rate * 1000
358 new_maxi2rate = self.Maxi2Rate * 1000
360 # Format template parameters for high bandwidth message
361 params['class'] = "high bandwidth"
362 params['bytes'] = format_bytes(usedi2bytes - self.i2bytes)
363 params['limit'] = format_bytes(self.Maxi2KByte * 1024)
364 params['new_maxexemptrate'] = bwlimit.format_tc_rate(new_maxi2rate)
367 logger.log("bwmon: %(slice)s %(class)s " \
368 "%(bytes)s of %(limit)s (%(new_maxrate)s/s maxrate)" % params)
370 # Cap high bandwidth burst rate
371 if new_maxi2rate != runningmaxi2rate:
372 message += template % params
373 logger.log("bwmon: %(slice)s %(class)s capped at %(new_maxexemptrate)s/s" % params)
376 if new_maxrate != runningmaxrate or new_maxi2rate != runningmaxi2rate:
377 bwlimit.set(xid = self.xid, maxrate = new_maxrate, maxexemptrate = new_maxi2rate)
380 if message and self.emailed == False:
381 subject = "pl_mom capped bandwidth of slice %(slice)s on %(hostname)s" % params
383 logger.log("bwmon: "+ subject)
384 logger.log("bwmon: "+ message + (footer % params))
387 slicemail(self.name, subject, message + (footer % params))
397 default_ThreshKByte,\
399 default_Threshi2KByte,\
406 # Incase the limits have changed.
407 default_MaxRate = int(bwlimit.get_bwcap() / 1000)
408 default_Maxi2Rate = int(bwlimit.bwmax / 1000)
410 # Incase default isn't set yet.
411 if default_MaxRate == -1:
412 default_MaxRate = 1000000
415 f = open(datafile, "r+")
416 logger.log("bwmon: Loading %s" % datafile)
417 (version, slices) = pickle.load(f)
419 # Check version of data file
420 if version != "$Id: bwmon.py,v 1.18 2007/04/25 22:19:59 faiyaza Exp $":
421 logger.log("bwmon: Not using old version '%s' data file %s" % (version, datafile))
424 version = "$Id: bwmon.py,v 1.18 2007/04/25 22:19:59 faiyaza Exp $"
427 # Get/set special slice IDs
428 root_xid = bwlimit.get_xid("root")
429 default_xid = bwlimit.get_xid("default")
431 # Since root is required for sanity, its not in the API/plc database, so pass {}
433 if root_xid not in slices.keys():
434 slices[root_xid] = Slice(root_xid, "root", {})
435 slices[root_xid].reset(0, 0, 0, 0, {})
437 # Used by bwlimit. pass {} since there is no rspec (like above).
438 if default_xid not in slices.keys():
439 slices[default_xid] = Slice(default_xid, "default", {})
440 slices[default_xid].reset(0, 0, 0, 0, {})
443 # Get running slivers that should be on this node (from plc). {xid: name}
444 for sliver in db.keys():
445 live[bwlimit.get_xid(sliver)] = sliver
448 # live.xids - runing(slices).xids = new.xids
450 for plcxid in live.keys():
451 if plcxid not in slices.keys():
452 newslicesxids.append(plcxid)
454 #newslicesxids = Set(live.keys()) - Set(slices.keys())
455 for newslicexid in newslicesxids:
456 # Delegated slices dont have xids (which are uids) since they haven't been
458 if newslicexid != None and db[live[newslicexid]].has_key('_rspec') == True:
459 logger.log("bwmon: New Slice %s" % live[newslicexid])
460 # _rspec is the computed rspec: NM retrieved data from PLC, computed loans
461 # and made a dict of computed values.
462 rspec = db[live[newslicexid]]['_rspec']
463 slices[newslicexid] = Slice(newslicexid, live[newslicexid], rspec)
464 slices[newslicexid].reset(0, 0, 0, 0, rspec)
466 logger.log("bwmon Slice %s doesn't have xid. Must be delegated. Skipping." % live[newslicexid])
468 # ...mlhuang's abortion....
469 # Get actual running values from tc.
470 # Update slice totals and bandwidth.
471 for params in bwlimit.get():
474 minexemptrate, maxexemptrate,
475 usedbytes, usedi2bytes) = params
477 # Ignore root and default buckets
478 if xid == root_xid or xid == default_xid:
481 name = bwlimit.get_slice(xid)
483 # Orphaned (not associated with a slice) class
487 # Monitor only the specified slices
488 if names and name not in names:
490 #slices is populated from the pickle file
491 #xid is populated from bwlimit (read from /etc/passwd)
492 if slices.has_key(xid):
494 # Old slices werent being instanciated correctly because
495 # the HTBs were still pleasent, but the slice in bwmon would
496 # have the byte counts set to 0. The next time update was run
497 # the real byte count would be sent to update, causing the bw cap.
499 if time.time() >= (slice.time + period) or \
500 usedbytes < slice.bytes or \
501 usedi2bytes < slice.i2bytes or \
502 xid in newslicesxids:
503 # Reset to defaults every 24 hours or if it appears
504 # that the byte counters have overflowed (or, more
505 # likely, the node was restarted or the HTB buckets
506 # were re-initialized).
507 slice.reset(maxrate, \
511 db[slice.name]['_rspec'])
514 slice.update(maxrate, \
518 db[slice.name]['_rspec'])
520 # Just in case. Probably (hopefully) this will never happen.
521 # New slice, initialize state
522 logger.log("bwmon: Deleting orphaned slice xid %s" % xid)
526 dead = Set(slices.keys()) - Set(live.keys())
528 if xid == root_xid or xid == default_xid:
533 logger.log("bwmon: Saving %s" % datafile)
534 f = open(datafile, "w")
535 pickle.dump((version, slices), f)
539 #def GetSlivers(data):
540 # for sliver in data['slivers']:
541 # if sliver.has_key('attributes'):
543 # for attribute in sliver['attributes']:
544 # if attribute['name'] == "KByteThresh": print attribute['value']
546 #def start(options, config):