3 # Swap monitoring daemon. Every 30 seconds, checks process memory
4 # usage. At 90% utilization, resets the slice that is consuming the
5 # most physical memory. At 95% utilization, reboots the machine to
8 # Mark Huang <mlhuang@cs.princeton.edu>
9 # Andy Bavier <acb@cs.princeton.edu>
10 # Copyright (C) 2004-2006 The Trustees of Princeton University
12 # $Id: BandwidthMonitor.py,v 1.1 2006/04/25 14:40:28 mlhuang Exp $
25 wrap = textwrap.TextWrapper()
27 # util-vserver/python/vserver.py allows us to control slices directly
29 from vserver import VServer
31 # bwlimit exports a few useful functions like run(), get_xid(), and get_slice()
40 datafile = "/var/lib/misc/swapmon.dat"
42 # Seconds between process analysis
45 # Minimum change in swap utilization over 30 seconds that will trigger
46 # early process analysis.
49 # Swap utilization at which the largest consumer of physical memory is reset
52 # Swap utilization at which the machine is rebooted
55 # Minimum physical memory utilization to be considered the largest consumer
58 # System slices that should not be reset (regexps)
59 system_slices = ['root', PLC_SLICE_PREFIX + '_']
61 # Message sent after a critical reboot
62 rebooted_subject = "pl_mom rebooted %(hostname)s"
65 Sometime before %(date)s, swap space was
66 nearly exhausted on %(hostname)s, so pl_mom rebooted it.
68 Slices active prior to reboot are listed below. Memory usage
69 statistics are not entirely accurate due to threading.
73 %(date)s %(hostname)s reboot
76 # Message sent after a hog is reset
77 reset_subject = "pl_mom reset slice %(slice)s on %(hostname)s"
80 Sometime before %(date)s, swap space was
81 nearly exhausted on %(hostname)s.
83 Slice %(slice)s was reset since it was the largest consumer of
84 physical memory at %(rss)s (%(percent)4.1f%%).
86 Please reply to this message explaining the nature of your experiment,
87 and what you are doing to address the problem.
89 %(slice)s processes prior to reset:
93 %(date)s %(hostname)s reset %(slice)s
96 # Message sent to system slices that should not be reset
97 alarm_subject = "pl_mom alarm slice %(slice)s on %(hostname)s"
100 Sometime before %(date)s, swap space was
101 nearly exhausted on %(hostname)s.
103 System slice %(slice)s was the largest consumer of physical memory at
104 %(rss)s (%(percent)4.1f%%). It was not reset, but please verify its
107 %(slice)s processes prior to alarm:
111 %(date)s %(hostname)s alarm %(slice)s
116 Usage: %s [OPTIONS]...
119 -d, --debug Enable debugging (default: %s)
120 -v, --verbose Increase verbosity level (default: %d)
121 -f, --file=FILE Data file (default: %s)
122 -s, --slice=SLICE Constrain monitoring to these slices (default: all)
123 -p, --period=SECONDS Seconds between normal process analysis (default: %s)
124 --reset-thresh=PERCENT Swap utilization at which slice reset is attempted
125 --reboot-thresh=PERCENT Swap utilization at which the machine is rebooted
126 --min-thresh=PERCENT Minimum physical memory utilization to be considered a hog
127 --system-slice=SLICE System slice that should not be reset
128 -h, --help This message
129 """.lstrip() % (sys.argv[0], debug, verbose, datafile, format_period(period))
131 def slicestat(names = None):
133 Get status of specified slices (if names is None or empty, all
134 slices). vsize and rss are in KiB. Returns
136 {xid: {'xid': slice_id,
138 'procs': [{'pid': pid, 'xid': slice_id, 'user', username, 'cmd': command,
139 'vsize': virtual_kib, 'rss': physical_kib,
140 'pcpu': cpu_percent, 'pmem': mem_percent}]
141 'vsize': total_virtual_kib,
142 'rss': total_physical_kib}}
145 # Mandatory fields. xid is a virtual field inserted by vps. Make
146 # sure cmd is last so that it does not get truncated
148 fields = ['pid', 'xid', 'user', 'vsize', 'rss', 'pcpu', 'pmem', 'cmd']
150 # vps inserts xid after pid in the output, but ps doesn't know
151 # what the field means.
152 ps_fields = list(fields)
153 ps_fields.remove('xid')
157 # Eat the header line. vps depends on the header to figure out
158 # which column is the PID column, so we can't just tell ps not to
160 for line in bwlimit.run("/usr/sbin/vps -e -o " + ",".join(ps_fields))[1:]:
164 # Replace "0 MAIN" and "1 ALL_PROC" (the special monikers that
165 # vps uses to denote the root context and the "all contexts"
166 # context) with "0" so that we can just split() on whitespace.
167 line = line.replace("0 MAIN", "0").replace("1 ALL_PROC", "0")
169 # Represent process as a dict of fields
170 values = line.split(None, len(fields) - 1)
171 if len(values) != len(fields):
173 proc = dict(zip(fields, values))
175 # Convert ints and floats
178 proc[field] = int(proc[field])
181 proc[field] = float(proc[field])
185 # Assign (pl_)sshd processes to slice instead of root
186 m = re.search(r"sshd: ([a-zA-Z_]+)", proc['cmd'])
188 xid = bwlimit.get_xid(m.group(1))
192 name = bwlimit.get_slice(proc['xid'])
194 # Orphaned (not associated with a slice) class
195 name = "%d?" % proc['xid']
197 # Monitor only the specified slices
198 if names and name not in names:
201 # Additional overhead calculations from slicestat
203 # Include 12 KiB of process overhead =
204 # 4 KiB top-level page table +
205 # 4 KiB kernel structure +
206 # 4 KiB basic page table
209 # Include additional page table overhead
210 if proc['vsize'] > 4096:
211 proc['rss'] += 4 * ((proc['vsize'] - 1) / 4096)
213 if slices.has_key(proc['xid']):
214 slice = slices[proc['xid']]
216 slice = {'xid': proc['xid'], 'name': name, 'procs': [], 'vsize': 0, 'rss': 0}
218 slice['procs'].append(proc)
219 slice['vsize'] += proc['vsize']
220 slice['rss'] += proc['rss']
222 slices[proc['xid']] = slice
228 Returns total physical memory on the system in KiB.
231 meminfo = open("/proc/meminfo", "r")
232 line = meminfo.readline()
234 if line[0:8] == "MemTotal":
235 # MemTotal: 255396 kB
236 (name, value, kb) = line.split()
243 Returns swap utilization on the system as a whole percentage (0-100).
250 swaps = open("/proc/swaps", "r")
252 lines = swaps.readlines()[1:]
255 # /dev/mapper/planetlab-swap partition 1048568 3740 -1
256 (filename, type, size, used, priority) = line.strip().split()
258 total_swap += int(size)
259 total_used += int(used)
260 except ValueEror, err:
262 except (IOError, KeyError), err:
265 return 100 * total_used / total_swap
269 global debug, verbose, datafile
270 global period, change_thresh, reset_thresh, reboot_thresh, min_thresh, system_slices
275 longopts = ["debug", "verbose", "file=", "slice=", "help"]
276 longopts += ["period=", "reset-thresh=", "reboot-thresh=", "min-thresh=", "system-slice="]
277 (opts, argv) = getopt.getopt(sys.argv[1:], "dvf:s:ph", longopts)
278 except getopt.GetoptError, err:
279 print "Error: " + err.msg
283 for (opt, optval) in opts:
284 if opt == "-d" or opt == "--debug":
286 elif opt == "-v" or opt == "--verbose":
288 elif opt == "-f" or opt == "--file":
290 elif opt == "-s" or opt == "--slice":
292 elif opt == "-p" or opt == "--period":
294 elif opt == "--change-thresh":
295 change_thresh = int(optval)
296 elif opt == "--reset-thresh":
297 reset_thresh = int(optval)
298 elif opt == "--reboot-thresh":
299 reboot_thresh = int(optval)
300 elif opt == "--min-thresh":
301 min_thresh = int(optval)
302 elif opt == "--system-slice":
303 system_slices.append(optval)
308 # Check if we are already running
315 # Redirect stdout and stderr to syslog
316 syslog.openlog("swapmon")
317 sys.stdout = sys.stderr = Logger()
319 # Get total physical memory
320 total_rss = memtotal()
323 f = open(datafile, "r+")
325 print "Loading %s" % datafile
326 (version, slices) = pickle.load(f)
328 # Check version of data file
329 if version != "$Id: bwmon.py,v 1.1 2006/04/25 14:40:28 mlhuang Exp $":
330 print "Not using old version '%s' data file %s" % (version, datafile)
333 # Send notification if we rebooted the node because of swap exhaustion
334 slicelist = slices.values()
335 slicelist.sort(lambda a, b: b['rss'] - a['rss'])
337 table = "%-20s%10s%24s\n\n" % ("Slice", "Processes", "Memory Usage")
338 for slice in slicelist:
339 table += "%-20s%10d%16s (%4.1f%%)\n" % \
340 (slice['name'], len(slice['procs']),
341 format_bytes(slice['rss'] * 1024, si = False),
342 100. * slice['rss'] / total_rss)
344 params = {'hostname': socket.gethostname(),
345 'date': time.asctime(time.gmtime()) + " GMT",
349 print rebooted_subject % params
350 print rebooted_body % params
352 slicemail(None, rebooted_subject % params, rebooted_body % params)
357 version = "$Id: bwmon.py,v 1.1 2006/04/25 14:40:28 mlhuang Exp $"
360 # Query process table every 30 seconds, or when a large change in
361 # swap utilization is detected.
366 # System slices that we have warned but could not reset
371 if last_used is None:
374 print "%d%% swap consumed" % used
376 if used >= reboot_thresh:
377 # Dump slice state before rebooting
379 print "Saving %s" % datafile
380 f = open(datafile, "w")
381 pickle.dump((version, slices), f)
384 # Goodbye, cruel world
385 print "%d%% swap consumed, rebooting" % used
387 bwlimit.run("/bin/sync; /sbin/reboot -f")
389 elif used >= reset_thresh:
391 slicelist = slices.values()
392 slicelist.sort(lambda a, b: b['rss'] - a['rss'])
393 for slice in slicelist:
394 percent = 100. * slice['rss'] / total_rss
395 if percent < min_thresh:
398 print "%d%% swap consumed, slice %s is using %s (%d%%) of memory" % \
401 format_bytes(slice['rss'] * 1024, si = False),
404 slice['procs'].sort(lambda a, b: b['rss'] - a['rss'])
406 table = "%5s %10s %10s %4s %4s %s\n\n" % ("PID", "VIRT", "RES", '%CPU', '%MEM', 'COMMAND')
407 for proc in slice['procs']:
408 table += "%5s %10s %10s %4.1f %4.1f %s\n" % \
410 format_bytes(proc['vsize'] * 1024, si = False),
411 format_bytes(proc['rss'] * 1024, si = False),
412 proc['pcpu'], proc['pmem'], proc['cmd'])
414 params = {'hostname': socket.gethostname(),
415 'date': time.asctime(time.gmtime()) + " GMT",
417 'slice': slice['name'],
418 'rss': format_bytes(slice['rss'] * 1024, si = False),
421 # Match slice name against system slice patterns
422 is_system_slice = filter(None, [re.match(pattern, slice['name']) for pattern in system_slices])
425 # Do not reset system slices, just warn once
426 if slice['name'] not in warned:
427 warned.append(slice['name'])
429 print alarm_subject % params
430 print alarm_body % params
432 print "Warning slice " + slice['name']
433 slicemail(slice['name'], alarm_subject % params, alarm_body % params)
437 print reset_subject % params
438 print reset_body % params
443 print "Resetting slice " + slice['name']
444 vserver = VServer(slice['name'])
446 vserver.start(wait = False)
450 except Exception, err:
451 print "Warning: Exception received while resetting slice %s:" % slice['name'], err
452 slicemail(slice['name'], reset_subject % params, reset_body % params)
455 elif timer <= 0 or used >= (last_used + change_thresh):
456 if used >= (last_used + change_thresh):
457 print "%d%% swap consumed, %d%% in last %d seconds" % \
458 (used, used - last_used, period - timer)
460 slices = slicestat(names)
463 # Keep track of large changes in swap utilization
471 if __name__ == '__main__':