X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=swapmon.py;h=3f6304ec8109eab639db5bd1bf040e7d2d9e69b2;hb=36d48e6b65c5b4514e417f425aee54685989a605;hp=7d579e4766fbd119724d161e14648ad9916f0b81;hpb=42dbe689a93025034f3e798174e54f3e2a0d3371;p=mom.git diff --git a/swapmon.py b/swapmon.py index 7d579e4..3f6304e 100755 --- a/swapmon.py +++ b/swapmon.py @@ -9,7 +9,7 @@ # Andy Bavier # Copyright (C) 2004-2006 The Trustees of Princeton University # -# $Id: BandwidthMonitor.py,v 1.1 2006/04/25 14:40:28 mlhuang Exp $ +# $Id: swapmon.py,v 1.4 2006/05/02 17:23:14 mlhuang Exp $ # import syslog @@ -21,9 +21,6 @@ import pickle import socket import time -import textwrap -wrap = textwrap.TextWrapper() - # util-vserver/python/vserver.py allows us to control slices directly # from Python from vserver import VServer @@ -125,6 +122,7 @@ Options: --reboot-thresh=PERCENT Swap utilization at which the machine is rebooted --min-thresh=PERCENT Minimum physical memory utilization to be considered a hog --system-slice=SLICE System slice that should not be reset + --status Print memory usage statistics and exit -h, --help This message """.lstrip() % (sys.argv[0], debug, verbose, datafile, format_period(period)) @@ -182,6 +180,12 @@ def slicestat(names = None): except ValueError: pass + # vps sometimes prints ERR instead of a context ID if it + # cannot identify the context of an orphaned (usually dying) + # process. Skip these processes. + if type(proc['xid']) != int: + continue + # Assign (pl_)sshd processes to slice instead of root m = re.search(r"sshd: ([a-zA-Z_]+)", proc['cmd']) if m is not None: @@ -264,6 +268,22 @@ def swap_used(): return 100 * total_used / total_swap +def summary(names = None, total_rss = memtotal()): + """ + Return a summary of memory usage by slice. + """ + slicelist = slicestat(names).values() + slicelist.sort(lambda a, b: b['rss'] - a['rss']) + + table = "%-20s%10s%24s\n\n" % ("Slice", "Processes", "Memory Usage") + for slice in slicelist: + table += "%-20s%10d%16s (%4.1f%%)\n" % \ + (slice['name'], len(slice['procs']), + format_bytes(slice['rss'] * 1024, si = False), + 100. * slice['rss'] / total_rss) + + return table + def main(): # Defaults global debug, verbose, datafile @@ -272,7 +292,7 @@ def main(): names = [] try: - longopts = ["debug", "verbose", "file=", "slice=", "help"] + longopts = ["debug", "verbose", "file=", "slice=", "status", "help"] longopts += ["period=", "reset-thresh=", "reboot-thresh=", "min-thresh=", "system-slice="] (opts, argv) = getopt.getopt(sys.argv[1:], "dvf:s:ph", longopts) except getopt.GetoptError, err: @@ -301,6 +321,9 @@ def main(): min_thresh = int(optval) elif opt == "--system-slice": system_slices.append(optval) + elif opt == "--status": + print summary(names) + sys.exit(0) else: usage() sys.exit(0) @@ -326,24 +349,13 @@ def main(): (version, slices) = pickle.load(f) f.close() # Check version of data file - if version != "$Id: bwmon.py,v 1.1 2006/04/25 14:40:28 mlhuang Exp $": + if version != "$Id: swapmon.py,v 1.4 2006/05/02 17:23:14 mlhuang Exp $": print "Not using old version '%s' data file %s" % (version, datafile) raise Exception - # Send notification if we rebooted the node because of swap exhaustion - slicelist = slices.values() - slicelist.sort(lambda a, b: b['rss'] - a['rss']) - - table = "%-20s%10s%24s\n\n" % ("Slice", "Processes", "Memory Usage") - for slice in slicelist: - table += "%-20s%10d%16s (%4.1f%%)\n" % \ - (slice['name'], len(slice['procs']), - format_bytes(slice['rss'] * 1024, si = False), - 100. * slice['rss'] / total_rss) - params = {'hostname': socket.gethostname(), 'date': time.asctime(time.gmtime()) + " GMT", - 'table': table} + 'table': summary(total_rss)} if debug: print rebooted_subject % params @@ -354,7 +366,7 @@ def main(): # Delete data file os.unlink(datafile) except Exception: - version = "$Id: bwmon.py,v 1.1 2006/04/25 14:40:28 mlhuang Exp $" + version = "$Id: swapmon.py,v 1.4 2006/05/02 17:23:14 mlhuang Exp $" slices = {} # Query process table every 30 seconds, or when a large change in