X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=swapmon.py;h=3f6304ec8109eab639db5bd1bf040e7d2d9e69b2;hb=06d48839b352ccb1a9b81e549a215c90c264a8fa;hp=7373d0820fc2b89390af426234c5845886c1d26c;hpb=26b391f03d73824688e27dacce8193b253e78873;p=mom.git diff --git a/swapmon.py b/swapmon.py index 7373d08..3f6304e 100755 --- a/swapmon.py +++ b/swapmon.py @@ -9,7 +9,7 @@ # Andy Bavier # Copyright (C) 2004-2006 The Trustees of Princeton University # -# $Id: swapmon.py,v 1.1 2006/04/28 19:26:59 mlhuang Exp $ +# $Id: swapmon.py,v 1.4 2006/05/02 17:23:14 mlhuang Exp $ # import syslog @@ -122,6 +122,7 @@ Options: --reboot-thresh=PERCENT Swap utilization at which the machine is rebooted --min-thresh=PERCENT Minimum physical memory utilization to be considered a hog --system-slice=SLICE System slice that should not be reset + --status Print memory usage statistics and exit -h, --help This message """.lstrip() % (sys.argv[0], debug, verbose, datafile, format_period(period)) @@ -179,6 +180,12 @@ def slicestat(names = None): except ValueError: pass + # vps sometimes prints ERR instead of a context ID if it + # cannot identify the context of an orphaned (usually dying) + # process. Skip these processes. + if type(proc['xid']) != int: + continue + # Assign (pl_)sshd processes to slice instead of root m = re.search(r"sshd: ([a-zA-Z_]+)", proc['cmd']) if m is not None: @@ -261,6 +268,22 @@ def swap_used(): return 100 * total_used / total_swap +def summary(names = None, total_rss = memtotal()): + """ + Return a summary of memory usage by slice. + """ + slicelist = slicestat(names).values() + slicelist.sort(lambda a, b: b['rss'] - a['rss']) + + table = "%-20s%10s%24s\n\n" % ("Slice", "Processes", "Memory Usage") + for slice in slicelist: + table += "%-20s%10d%16s (%4.1f%%)\n" % \ + (slice['name'], len(slice['procs']), + format_bytes(slice['rss'] * 1024, si = False), + 100. * slice['rss'] / total_rss) + + return table + def main(): # Defaults global debug, verbose, datafile @@ -269,7 +292,7 @@ def main(): names = [] try: - longopts = ["debug", "verbose", "file=", "slice=", "help"] + longopts = ["debug", "verbose", "file=", "slice=", "status", "help"] longopts += ["period=", "reset-thresh=", "reboot-thresh=", "min-thresh=", "system-slice="] (opts, argv) = getopt.getopt(sys.argv[1:], "dvf:s:ph", longopts) except getopt.GetoptError, err: @@ -298,6 +321,9 @@ def main(): min_thresh = int(optval) elif opt == "--system-slice": system_slices.append(optval) + elif opt == "--status": + print summary(names) + sys.exit(0) else: usage() sys.exit(0) @@ -323,24 +349,13 @@ def main(): (version, slices) = pickle.load(f) f.close() # Check version of data file - if version != "$Id: swapmon.py,v 1.1 2006/04/28 19:26:59 mlhuang Exp $": + if version != "$Id: swapmon.py,v 1.4 2006/05/02 17:23:14 mlhuang Exp $": print "Not using old version '%s' data file %s" % (version, datafile) raise Exception - # Send notification if we rebooted the node because of swap exhaustion - slicelist = slices.values() - slicelist.sort(lambda a, b: b['rss'] - a['rss']) - - table = "%-20s%10s%24s\n\n" % ("Slice", "Processes", "Memory Usage") - for slice in slicelist: - table += "%-20s%10d%16s (%4.1f%%)\n" % \ - (slice['name'], len(slice['procs']), - format_bytes(slice['rss'] * 1024, si = False), - 100. * slice['rss'] / total_rss) - params = {'hostname': socket.gethostname(), 'date': time.asctime(time.gmtime()) + " GMT", - 'table': table} + 'table': summary(total_rss)} if debug: print rebooted_subject % params @@ -351,7 +366,7 @@ def main(): # Delete data file os.unlink(datafile) except Exception: - version = "$Id: swapmon.py,v 1.1 2006/04/28 19:26:59 mlhuang Exp $" + version = "$Id: swapmon.py,v 1.4 2006/05/02 17:23:14 mlhuang Exp $" slices = {} # Query process table every 30 seconds, or when a large change in