--- /dev/null
+#!/usr/bin/perl -w
+
+use POSIX qw(setsid);
+use Sys::Syslog;
+use Sys::Hostname;
+#use LWP::Simple;
+
+$debug = 0;
+$proc = "pl_mom";
+$alias_addr = "pl-mom\@planet-lab.org";
+$from_addr = "support\@planet-lab.org";
+
+if (! $debug) {
+ $kill_thresh = 90;
+ $reboot_thresh = 95;
+ $log_thresh = 85;
+ $change_thresh = 5;
+ $min_thresh = 10;
+ #$bwcap_default = "off";
+ $bwcap_default = "1.5Mbit";
+ $cutoff_default = "16200000000"; # 16GB, for 1.5Mbit cap
+ $bwmon_sleep = 900;
+
+ $sendmail = "/usr/sbin/sendmail -t -f$from_addr";
+ $vservers = "/etc/vservers";
+ $pidfile = "/var/run/$proc.pid";
+ $rebootfile = "/var/lib/misc/pl_mom.reboot";
+ $daily_log = "/var/lib/misc/pl_mom.daily";
+ $daily_stamp = "/var/lib/misc/pl_mom.stamp";
+ $configfile = "/etc/planetlab/pl_mom.conf";
+ $capfile = "/var/lib/misc/pl_mom.oldcaps";
+} else {
+ $kill_thresh = 2;
+ $reboot_thresh = 20;
+ $log_thresh = 2;
+ $change_thresh = 5;
+ $min_thresh = 2;
+ $bwcap_default = "1Kbit";
+ $cutoff_default = "10800";
+ $bwmon_sleep = 10;
+
+ $sendmail = "cat";
+ $vservers = "./debug";
+ $pidfile = "./$proc.pid";
+ $rebootfile = "./debug/pl_mom.reboot";
+ $daily_log = "./debug/pl_mom.daily";
+ $daily_stamp = "./debug/pl_mom.stamp";
+ $configfile = "./debug/pl_mom.conf";
+ $capfile = "./debug/pl_mom.oldcaps";
+}
+
+$sleep = 30;
+
+# daemonize the program
+if (! $debug) {
+ &daemonize;
+}
+
+system("echo $$ > $pidfile");
+
+read_config_file();
+
+# Check to see whether pl_mom rebooted the node
+if (-e $rebootfile) {
+ unlink($rebootfile);
+ syslog ("warning", "pl_mom: Sending shutdown mail");
+ shutdown_mail();
+}
+
+my $pid = fork();
+if (! $pid) {
+ syslog ("info", "pl_mom: Launching reboot kicker");
+ reboot_kicker();
+ die (0);
+}
+$pid = fork();
+if (! $pid) {
+ syslog ("info", "pl_mom: Launching bandwidth monitor");
+ if ($bwcap_default =~ /off/) {
+ syslog("info", "pl_mom: Max rate unlimited by default");
+ }
+ reset_bandwidth_caps();
+ bandwidth_monitor();
+ die (0);
+}
+
+while (1) {
+ $used = int(swap_used());
+
+ if (defined($old_used)) {
+ if ($used >= $old_used + $change_thresh) {
+ syslog ("info", "pl_mom: %d%% swap consumed in last %d seconds",
+ $used - $old_used, $sleep);
+ }
+ }
+
+ if ($used >= $log_thresh) {
+ if (! defined($old_used) || $used != $old_used) {
+ syslog ("info", "pl_mom: Swap used: %d%%", $used);
+ }
+ get_slice_info();
+ my $hog = memory_hog();
+ if ($hog) {
+ if ($used < $kill_thresh) {
+ if (! defined($Warning{$hog})) {
+ $Warning{$hog} = "sent";
+ syslog ("warning", "pl_mom: Slice $hog is ".
+ "using $Slice{$hog}{mem_pct}%% of memory");
+ #slice_warning_mail($hog);
+ }
+ } else {
+ my $id = `id -u $hog`;
+ chomp($id);
+ my $top = `/usr/sbin/chcontext --ctx $id /usr/bin/top -b -n 1`;
+ syslog ("warning", "pl_mom: Resetting slice $hog");
+ if (! $debug) {
+ slice_reset($hog);
+ }
+ syslog ("warning", "pl_mom: Sending mail to slice $hog");
+ slice_reset_mail($hog, $top);
+ }
+ }
+ }
+
+ sleep ($sleep);
+
+ $old_used = $used;
+}
+
+sub reboot_kicker {
+ while (1) {
+ $used = swap_used();
+
+ if ($used >= $reboot_thresh) {
+ syslog ("warning", "pl_mom: Rebooting node");
+
+ system("touch $rebootfile");
+ if (! $debug) {
+ #system("shutdown -r now");
+ system("/bin/sync; /sbin/reboot -f");
+ }
+ die (0);
+ }
+
+ sleep (1);
+ }
+}
+
+sub bandwidth_monitor {
+ while (1) {
+ # See if a new day has started for bandwidth monitoring
+ chomp($now = `date -u +%D`);
+ if (-e $daily_stamp) {
+ chomp($stamp = `cat $daily_stamp`);
+ }
+ if (! defined($stamp) || !($stamp =~ $now)) {
+ open (STAMP, ">$daily_stamp") ||
+ die "Can't open file $daily_stamp for writing: $!\n";
+ print STAMP "$now\n";
+ close STAMP;
+ unlink ($daily_log);
+
+ # Could save the list of capped slices in a file in order to
+ # avoid re-sending mails if the daemon restarts.
+ # Also may want a list of slices that are exempt from capping.
+ if (defined(%Start)) { undef %Start; }
+ if (defined(%Now)) { undef %Now; }
+ if (defined(%Cap)) { undef %Cap; }
+
+ reset_bandwidth_caps();
+
+ syslog("info", "pl_mom: Beginning bandwidth monitoring for $now");
+ }
+
+ get_slice_names();
+ get_baseline_counts();
+ get_slice_limits();
+
+ foreach $slice ( sort (keys %Start) ) {
+ if (defined $Now{$slice}) {
+ $today = $Now{$slice} - $Start{$slice};
+ if (! (defined ($Cutoff{$slice})||$bwcap_default =~ /off/)) {
+ $Cutoff{$slice} = $cutoff_default;
+ $Maxrate{$slice} = $bwcap_default;
+ }
+ if ($debug) {
+ if ($today) {
+ $cutoff = defined($Cutoff{$slice})
+ ? $Cutoff{$slice} : "<none>";
+ print "Slice $slice sent $today bytes; ".
+ "cutoff $cutoff\n";
+ }
+ }
+ if (defined ($Cutoff{$slice}) &&
+ $today >= $Cutoff{$slice} &&
+ ! defined($Cap{$slice})) {
+ $Cap{$slice} = "sent";
+ bw_cap_mail($slice);
+ log_bandwidth_cap($slice, $Maxrate{$slice});
+ cap_bandwidth($slice, $Maxrate{$slice});
+ }
+ } else {
+ # Token bucket for this slice is gone!
+ }
+ }
+
+ sleep($bwmon_sleep);
+ }
+}
+
+sub read_config_file {
+ if (-e $configfile) {
+ open (CONFIG, "<$configfile") ||
+ print "Cannot open $configfile; $!\n";
+ while (<CONFIG>) {
+ if (m/^(.*)=(.*)$/) {
+ ${$1} = $2;
+ if ($debug) {
+ print "read_config_file: $1 = ${$1}\n";
+ }
+ }
+ }
+ close CONFIG;
+ }
+}
+
+sub get_slice_names {
+ # Read slice names from /etc/passwd
+ if (defined (%Name)) { undef %Name; }
+ open (PASSWD, "</etc/passwd") ||
+ print "Cannot open /etc/passwd; $!\n";
+ while (<PASSWD>) {
+ my ($slicename, $passwd, $sliceid) = split(/:/);
+ $Name{$sliceid} = $slicename;
+ }
+ close PASSWD;
+}
+
+sub get_baseline_counts {
+ `touch $daily_log`;
+ open (BASE, "+<$daily_log") ||
+ print "Cannot open $daily_log; $!\n";
+ while (<BASE>) {
+ my ($slice, $bytecount) = split(/ /);
+ $Start{$slice} = $bytecount;
+ }
+
+ my $status = `tc -s -d qdisc show`;
+ my $sliceid = 0xffff;
+ @Lines = split(/\n/, $status);
+ foreach $line ( @Lines ) {
+ if ($line =~ /qdisc pfifo (.*): dev/) {
+ $sliceid = hex($1);
+ # "Capped" buckets all begin with 0x1000. Ignore the root
+ # (0x1000) and default (0x1fff) buckets, as well as
+ # "exempt" buckets that begin with 0x2000 (or anything
+ # other than 0x1000).
+ if (($sliceid & 0xf000) == 0x1000 &&
+ $sliceid != 0x1000 && $sliceid != 0x1fff) {
+ $sliceid = $sliceid & 0x0fff;
+ } else {
+ $sliceid = 0xffff;
+ }
+ } else {
+ if ($line =~ /Sent (.*) bytes/) {
+ my $bytes = $1;
+ if ($sliceid != 0xffff) {
+ my $slice = $Name{$sliceid};
+ if ($debug && $bytes) {
+ print "Slice: $slice ($sliceid), bytes $bytes\n";
+ }
+ if (! defined($Start{$slice})) {
+ print BASE "$slice $bytes\n";
+ $Start{$slice} = $bytes;
+ }
+ $Now{$slice} = $bytes;
+ }
+ }
+ }
+ }
+ close (BASE);
+}
+
+sub get_slice_limits {
+ if (defined %Maxrate) { undef %Maxrate; }
+ if (defined %Cutoff) { undef %Cutoff; }
+ if (-e $vservers) {
+ my $result = `grep -H "^BWAVGRATE" $vservers/*.conf`;
+ chomp ($result);
+ my @Lines = split(/\n/,$result);
+ foreach $line ( @Lines ) {
+ if ($line =~ /\/([^\/]*).conf:BWAVGRATE=(.*)[Mm]bit/) {
+ $slice = $1;
+ $limit = $2."Mbit";
+ $cutoff = ($2 * 1000000 * 86400)/8;
+ } else {
+ if ($line =~ /\/([^\/]*).conf:BWAVGRATE=(.*)[Kk]bit/) {
+ $slice = $1;
+ $limit = $2."Kbit";
+ $cutoff = ($2 * 1000 * 86400)/8;
+ } else {
+ die "Could not parse line $line";
+ }
+ }
+ $Maxrate{$slice} = $limit;
+ $Cutoff{$slice} = $cutoff;
+ }
+ }
+}
+
+sub reset_bandwidth_caps {
+ if (-e $capfile) {
+ open(CAP, "<$capfile") or die "Cannot open $capfile: $!";
+ while (<CAP>) {
+ chomp();
+ ($slicename, $oldcap) = split(/ /);
+ syslog("info", "pl_mom: Restoring bandwidth cap of $oldcap ".
+ "to $slicename");
+ cap_bandwidth ($slicename, $oldcap);
+ }
+ close CAP;
+ unlink($capfile);
+ }
+}
+
+sub log_bandwidth_cap {
+ ($slicename, $cap) = @_;
+ syslog("warning", "pl_mom: Capping bandwidth of slice ".
+ "$slicename at $cap until midnight GMT.");
+ # Save current cap to $capfile
+ system("echo $slicename `bwlimit getcap $slicename` >> $capfile");
+}
+
+sub send_mail {
+ # Arg 0: recipient addresses, comma-separated string
+ # Arg 1: subject line
+ # Arg 2: body of message
+ my $to = "To: $_[0]\n";
+ my $from = "From: $from_addr\n";
+ my $subject = "Subject: $_[1]\n";
+ my $msg = $_[2];
+
+ if ($debug) {
+ print $to;
+ print $subject;
+ } else {
+ open(SENDMAIL, "|$sendmail") or die "Cannot open $sendmail: $!";
+ print SENDMAIL $to;
+ print SENDMAIL $from;
+ print SENDMAIL $subject;
+ print SENDMAIL "Content-type: text/plain\n\n";
+ print SENDMAIL $msg;
+ close(SENDMAIL);
+ }
+}
+
+sub cap_bandwidth {
+ ($slicename, $cap) = @_;
+ system("bwlimit setcap $slicename $cap");
+ system("bwlimit on $slicename");
+}
+
+sub get_date {
+ my ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdat)
+ = localtime(time);
+ my $date = sprintf("%4d-%02d-%02d %02d:%02d:%02d",
+ $year+1900, $mon+1, $mday, $hour, $min, $sec);
+ return $date;
+}
+
+sub bw_cap_mail {
+ my ($slicename) = @_;
+ my $hostname = hostname();
+ my $date = get_date();
+ my $sent = int($Cutoff{$slicename}/(1024*1024));
+ my $bwcap = $Maxrate{$slicename};
+
+ send_mail("$alias_addr, $slicename\@slices.planet-lab.org",
+ "$proc capped bandwidth of slice $slicename on $hostname",
+ "Slice $slicename has transmitted more than ${sent}MB today".
+ " on $hostname. ".
+ "Its bandwidth will be capped at $bwcap until midnight GMT.".
+ "\n\n$date $hostname bwcap $slicename\n");
+}
+
+sub shutdown_mail {
+ my $hostname = hostname();
+ my $date = get_date();
+ send_mail($alias_addr,
+ "$proc rebooted $hostname",
+ "Swap space was exhausted on $hostname and so $proc rebooted ".
+ "it.\n\nAs of $date, the node has successfully come back ".
+ "online.\n\n$date $hostname reboot\n");
+}
+
+sub slice_reset_mail {
+ my $hog = $_[0];
+ my $top = $_[1];
+ my $hog_mem = sprintf ("%0.f", $Slice{$hog}{pmem}/1000);
+ my $hog_pct = $Slice{$hog}{mem_pct};
+ my $hostname = hostname();
+ my $date = get_date();
+ send_mail("$alias_addr, $hog\@slices.planet-lab.org",
+ "$proc reset slice $hog on $hostname",
+ "As of $date, swap space is nearly exhausted on $hostname.\n\n".
+ "Slice $hog is being reset since it is the largest consumer ".
+ "of physical memory at ${hog_mem}MB ($hog_pct%).\n\n".
+ "Please reply to this message explaining the nature of your ".
+ "experiment, and what you are doing to address the problem.\n".
+ "\nOutput of 'top -b -n 1' in your slice prior to reset:\n".
+ "$top\n\n$date $hostname reset $hog\n");
+}
+
+sub slice_warning_mail {
+ my $hog = $_[0];
+ my $hog_mem = sprintf ("%0.f", $Slice{$hog}{pmem}/1000);
+ my $hog_pct = $Slice{$hog}{mem_pct};
+ my $hostname = hostname();
+ my $date = get_date();
+
+ if ($hog =~ /^root$/) {
+ $to = $alias_addr;
+ } else {
+ $to = "$alias_addr, $hog\@slices.planet-lab.org";
+ }
+
+ send_mail($to,
+ "$proc may reset slice $hog on $hostname",
+ "As of $date, swap space is over $log_thresh% full on ".
+ "$hostname.\n\nSlice $hog is the largest consumer ".
+ "of physical memory at ${hog_mem}MB ($hog_pct%).\n".
+ "Please check the memory usage of your slice to avoid a ".
+ "reset.\n\n$date $hostname warning $hog\n");
+}
+
+sub unkillable_alarm_mail {
+ my $hog = $_[0];
+ my $hog_mem = sprintf ("%0.f", $Slice{$hog}{pmem}/1000);
+ my $hog_pct = $Slice{$hog}{mem_pct};
+ my $hostname = hostname();
+ my $date = get_date();
+
+ if ($hog =~ /^root$/) {
+ $to = $alias_addr;
+ } else {
+ $to = "$alias_addr, $hog\@slices.planet-lab.org";
+ }
+
+ send_mail($to,
+ "$proc: alarm for slice $hog on $hostname",
+ "As of $date, swap space is over $log_thresh% full on ".
+ "$hostname.\n\nSlice $hog is the largest consumer ".
+ "of physical memory at ${hog_mem}MB ($hog_pct%).\n".
+ "The slice will not be reset, but please verify its behavior.\n".
+ "\n$date $hostname alarm $hog\n");
+}
+
+sub slice_reset {
+ my $slice = $_[0];
+ my $sliceid = $Slice{$slice}{ctx};
+ system("chcontext --ctx $sliceid sudo kill -9 -1");
+ system("/etc/init.d/vserver-init start $slice");
+}
+
+sub swap_used {
+ open (SWAP, "</proc/swaps") ||
+ die "Cannot open /proc/swaps; $!\n";
+
+ $line = <SWAP>;
+ $line = <SWAP>;
+ $line =~ s/[\t ]+/ /g;
+
+ my ($filename, $type, $size, $used, $priority) = split(/ /, $line);
+ close SWAP;
+
+ return 100*($used/$size);
+}
+
+sub get_slice_info {
+ if (! $debug) {
+ $content = `curl -s http://127.0.0.1:3100/slicestat`;
+ } else {
+ #$content = `cat ../pl_mom-deploy/slicestat`
+ $content = `curl -s http://127.0.0.1:3100/slicestat`;
+ }
+ my @lines = split(/\n/, $content);
+ %Slice = ();
+ foreach $line (@lines) {
+ my ($slice, $ctx, $cpu_pct, $mem_pct, $pmem, $vmem, $ntasks)
+ = split(/,/,$line);
+ $Slice{$slice}{ctx} = $ctx;
+ $Slice{$slice}{cpu_pct} = $cpu_pct;
+ $Slice{$slice}{mem_pct} = $mem_pct;
+ $Slice{$slice}{pmem} = $pmem;
+ $Slice{$slice}{vmem} = $vmem;
+ $Slice{$slice}{ntasks} = $ntasks;
+ }
+}
+
+sub memory_hog {
+ @keys = sort { $Slice{$b}{mem_pct} <=> $Slice{$a}{mem_pct} } (keys %Slice);
+ foreach $key (@keys) {
+ if ($Slice{$key}{mem_pct} >= $min_thresh) {
+ if ($key =~ /^root$/ || $key =~ /slicestat/ || $key =~ /netflow/) {
+ if (! defined ($Warning{$key})) {
+ $Warning{$key} = "sent";
+ syslog ("warning", "pl_mom: Sending alarm mail to ".
+ "unkillable slice $key, using ".
+ "$Slice{$key}{mem_pct}%% of memory");
+ unkillable_alarm_mail($key);
+ }
+ } else {
+ return $key;
+ }
+ } else {
+ #syslog ("info", "pl_mom: No killable slice using > ".
+ # "$min_thresh%% memory");
+ return;
+ }
+ }
+}
+
+sub daemonize {
+ chdir '/' or die "Can't chdir to /: $!";
+ open STDIN, '/dev/null' or die "Can't read /dev/null: $!";
+ open STDOUT, '>>/dev/null' or die "Can't write to /dev/null: $!";
+ open STDERR, '>>/dev/null' or die "Can't write to /dev/null: $!";
+ defined(my $pid = fork) or die "Can't fork: $!";
+ exit if $pid;
+ setsid or die "Can't start a new session: $!";
+ umask 0;
+}
%define name pl_mom
-%define version 0.4
-%define release 1%{?pldistro:.%{pldistro}}%{?date:.%{date}}
+%define version 0.3
+%define release 15%{?pldistro:.%{pldistro}}%{?date:.%{date}}
-Summary: PlanetLab node monitoring tools
+Summary: PlanetLab mom -- Cleans up your mess
Name: %{name}
Version: %{version}
Release: %{release}
-License: GPL
+License: dontknow
Group: System Environment/Kernel
Source: %{name}-%{version}.tgz
Vendor: PlanetLab
URL: http://cvs.planet-lab.org/cvs/pl_mom
BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n)
Requires: expect
-# swapmon requires vps and bwlimit.py
-Requires: util-vserver, util-vserver-python
-# bwmon requires tc
-Requires: iproute
%description
-pl_mom is a suite of PlanetLab node monitoring tools.
-swapmon is a swap monitoring daemon. Every 30 seconds, it checks
-process memory usage. At 90% utilization, resets the slice that is
-consuming the most physical memory. At 95% utilization, it reboots the
-machine to avoid a crash.
+A small daemon that watches the consumed swap space. At 90% utilization, it
+resets the slice that is the biggest memory hog. At 95% utilization, it
+reboots the machine.
-bwmon is a cron job that monitors the average bandwidth usage of each
-slice and enforces a daily byte limit for each slice.
-
-pl_mop is a cron job that "fixes" various common problems with nodes
-(dead services, ext3 corruption, zombie SSH sessions, etc.).
+A cron job which "fixes" various common problems with nodes (dead
+services, ext3 corruption, zombie SSH sessions) is also installed.
%prep
-%setup -q
+
+%setup
%build
%install
-rm -rf $RPM_BUILD_ROOT
+mkdir -p $RPM_BUILD_ROOT/usr/local/planetlab/bin/
+mkdir -p $RPM_BUILD_ROOT/etc/init.d/
-# Utility functions
-install -D -m 644 pl_mom.py $RPM_BUILD_ROOT/%{_datadir}/%{name}/pl_mom.py
+cp pl_mom $RPM_BUILD_ROOT/etc/init.d/
+cp pl_mom.pl $RPM_BUILD_ROOT/usr/local/planetlab/bin/
+cp pl_mop.sh $RPM_BUILD_ROOT/usr/local/planetlab/bin/
-# Bandwidth monitor (bwmon), run periodically
-install -D -m 755 bwmon.py $RPM_BUILD_ROOT/%{_datadir}/%{name}/bwmon.py
+install -D -m 644 pl_mop.cron $RPM_BUILD_ROOT/etc/cron.d/pl_mop
-# Swap monitoring daemon (swapmon)
-install -D -m 755 swapmon.py $RPM_BUILD_ROOT/%{_datadir}/%{name}/swapmon.py
-install -D -m 755 swapmon.init $RPM_BUILD_ROOT/%{_initrddir}/swapmon
+%clean
+[ "$RPM_BUILD_ROOT" != "/" ] && rm -rf $RPM_BUILD_ROOT
-# Cleanup script
-install -D -m 755 pl_mop.sh $RPM_BUILD_ROOT/usr/local/planetlab/bin/pl_mop.sh
+%files
+%defattr(0755, root, root)
+/etc/init.d/pl_mom
+/usr/local/planetlab/bin/pl_mom.pl
+/usr/local/planetlab/bin/pl_mop.sh
+/etc/cron.d/pl_mop
-# Runs pl_mop and bwmon periodically
-install -D -m 644 pl_mom.cron $RPM_BUILD_ROOT/%{_sysconfdir}/cron.d/pl_mom
+%pre
-%clean
-rm -rf $RPM_BUILD_ROOT
%post
-chkconfig --add swapmon
-chkconfig swapmon on
-if [ "$PL_BOOTCD" != "1" ] ; then
- service swapmon restart
-fi
+if [ "$1" -ge 1 ]; then
-# Randomize pl_mop run time
-M=$((60 * $RANDOM / 32768))
-H=$((24 * $RANDOM / 32768))
-sed -i -e "s/@M@/$M/" -e "s/@H@/$H/" %{_sysconfdir}/cron.d/pl_mom
+ chkconfig --add pl_mom
+ chkconfig --level 3 pl_mom on
-exit 0
+ if [[ "$PL_BOOTCD" != "1" ]]; then
+ /etc/init.d/pl_mom stop
+ /etc/init.d/pl_mom start
+ fi
+
+ # Randomize pl_mop run time
+ M=$((60 * $RANDOM / 32768))
+ H=$((24 * $RANDOM / 32768))
+ sed -i -e "s/@M@/$M/" -e "s/@H@/$H/" /etc/cron.d/pl_mop
+fi
%preun
-# 0 = erase, 1 = upgrade
if [ "$1" -eq 0 ]; then
- if [ "$PL_BOOTCD" != "1" ] ; then
- service swapmon stop
- fi
- chkconfig swapmon off
- chkconfig --del swapmon
+ if [[ "$PL_BOOTCD" != "1" ]]; then
+ /etc/init.d/pl_mom stop
+ fi
+
+ chkconfig --del pl_mom
+ chkconfig pl_mom off
fi
-exit 0
-%files
-%defattr(-, root, root, -)
-%{_datadir}/%{name}/pl_mom.py
-%{_datadir}/%{name}/bwmon.py
-%{_datadir}/%{name}/swapmon.py
-%{_initrddir}/swapmon
-/usr/local/planetlab/bin/pl_mop.sh
-%{_sysconfdir}/cron.d/pl_mom
+%postun
+
+