From 0b94296a0de4e93a3ebb4c0747ea2be4e29ae674 Mon Sep 17 00:00:00 2001 From: Planet-Lab Support Date: Tue, 9 May 2006 03:23:58 +0000 Subject: [PATCH] This commit was manufactured by cvs2svn to create branch 'planetlab-3_3-branch'. --- Makefile | 31 +-- pl_mom | 74 ++++++++ pl_mom.pl | 532 ++++++++++++++++++++++++++++++++++++++++++++++++++++ pl_mom.spec | 106 +++++------ pl_mop.cron | 10 + pl_mop.sh | 4 +- 6 files changed, 683 insertions(+), 74 deletions(-) create mode 100755 pl_mom create mode 100755 pl_mom.pl create mode 100644 pl_mop.cron diff --git a/Makefile b/Makefile index 9b75c7f..06048a0 100644 --- a/Makefile +++ b/Makefile @@ -1,20 +1,23 @@ +# @COPYRIGHT@ # -# pl_mom suite of node monitors -# -# Mark Huang -# Copyright (C) 2006 The Trustees of Princeton University -# -# $Id$ -# +# $Id: Makefile,v 1.2 2004/11/17 20:24:11 acb Exp $ + +BASE=pl_mom +VERSION=0.3 +TARFILE=$(BASE)-$(VERSION).tgz + +all: pl_mom.pl pl_mom -ALL := leak +install: + install -D -m 755 pl_mom.pl /usr/local/planetlab/bin/pl_mom.pl + install -D -m 755 pl_mom /etc/init.d/pl_mom -CC := gcc -CFLAGS := -Wall -O2 +tarball: + rm -f $(TARFILE) + tar cvfz $(TARFILE) -C .. --exclude=CVS $(BASE)-$(VERSION) -all: $(ALL) +rpm:: tarball + sudo cp $(TARFILE) /usr/src/redhat/SOURCES + sudo rpmbuild -ba $(BASE).spec -clean: - rm -f $(ALL) -.PHONY: all clean diff --git a/pl_mom b/pl_mom new file mode 100755 index 0000000..a41c481 --- /dev/null +++ b/pl_mom @@ -0,0 +1,74 @@ +#!/bin/sh +# +# chkconfig: 345 98 02 +# description: pl_mom (daemon of death) startup script +# + +CODE='/usr/local/planetlab/bin/pl_mom.pl' +PROC='pl_mom' + +. /etc/rc.d/init.d/functions + +RETVAL=0 + +pidfile=/var/run/$PROC.pid + +check_status() { + pid=`cat $pidfile 2>/dev/null` + # + # this eliminates a race condition between checking existence of pidfile + # and reading its value + # + [ -n "$pid" -a -d /proc/$pid ] +} + +case "$1" in + start) + echo -n "starting $PROC:" + [ -r $CODE ] || action "code missing" /bin/false || exit 1 + pid=`cat $pidfile 2>/dev/null` + if [ -n "$pid" ]; then + # check whether process really exists + # yes - don't try to start + [ -d /proc/$pid ] && action "already running" /bin/true && exit 1 + + # no - PID file is stale + rm -f $pidfile + fi + + $CODE + sleep 1 + + cmd=success + check_status || cmd=failure + $cmd "$PROC startup" + echo + ;; + + stop) + echo -n "shutting down $PROC: " + check_status && kill -TERM -`cat $pidfile` && sleep 1 + cmd=failure + check_status || cmd=success && rm -f $pidfile + $cmd "$PROC shutdown" + RETVAL=0 + echo + ;; + + restart|reload) + $0 stop + $0 start + RETVAL=$? + ;; + + status) + check_status && echo 'running' && exit 0 || \ + echo 'not running' && exit 1 + ;; + + *) + echo "Usage: $0 {start|stop|restart|status}" + exit 1 +esac + +exit $RETVAL diff --git a/pl_mom.pl b/pl_mom.pl new file mode 100755 index 0000000..e16f24c --- /dev/null +++ b/pl_mom.pl @@ -0,0 +1,532 @@ +#!/usr/bin/perl -w + +use POSIX qw(setsid); +use Sys::Syslog; +use Sys::Hostname; +#use LWP::Simple; + +$debug = 0; +$proc = "pl_mom"; +$alias_addr = "pl-mom\@planet-lab.org"; +$from_addr = "support\@planet-lab.org"; + +if (! $debug) { + $kill_thresh = 90; + $reboot_thresh = 95; + $log_thresh = 85; + $change_thresh = 5; + $min_thresh = 10; + #$bwcap_default = "off"; + $bwcap_default = "1.5Mbit"; + $cutoff_default = "16200000000"; # 16GB, for 1.5Mbit cap + $bwmon_sleep = 900; + + $sendmail = "/usr/sbin/sendmail -t -f$from_addr"; + $vservers = "/etc/vservers"; + $pidfile = "/var/run/$proc.pid"; + $rebootfile = "/var/lib/misc/pl_mom.reboot"; + $daily_log = "/var/lib/misc/pl_mom.daily"; + $daily_stamp = "/var/lib/misc/pl_mom.stamp"; + $configfile = "/etc/planetlab/pl_mom.conf"; + $capfile = "/var/lib/misc/pl_mom.oldcaps"; +} else { + $kill_thresh = 2; + $reboot_thresh = 20; + $log_thresh = 2; + $change_thresh = 5; + $min_thresh = 2; + $bwcap_default = "1Kbit"; + $cutoff_default = "10800"; + $bwmon_sleep = 10; + + $sendmail = "cat"; + $vservers = "./debug"; + $pidfile = "./$proc.pid"; + $rebootfile = "./debug/pl_mom.reboot"; + $daily_log = "./debug/pl_mom.daily"; + $daily_stamp = "./debug/pl_mom.stamp"; + $configfile = "./debug/pl_mom.conf"; + $capfile = "./debug/pl_mom.oldcaps"; +} + +$sleep = 30; + +# daemonize the program +if (! $debug) { + &daemonize; +} + +system("echo $$ > $pidfile"); + +read_config_file(); + +# Check to see whether pl_mom rebooted the node +if (-e $rebootfile) { + unlink($rebootfile); + syslog ("warning", "pl_mom: Sending shutdown mail"); + shutdown_mail(); +} + +my $pid = fork(); +if (! $pid) { + syslog ("info", "pl_mom: Launching reboot kicker"); + reboot_kicker(); + die (0); +} +$pid = fork(); +if (! $pid) { + syslog ("info", "pl_mom: Launching bandwidth monitor"); + if ($bwcap_default =~ /off/) { + syslog("info", "pl_mom: Max rate unlimited by default"); + } + reset_bandwidth_caps(); + bandwidth_monitor(); + die (0); +} + +while (1) { + $used = int(swap_used()); + + if (defined($old_used)) { + if ($used >= $old_used + $change_thresh) { + syslog ("info", "pl_mom: %d%% swap consumed in last %d seconds", + $used - $old_used, $sleep); + } + } + + if ($used >= $log_thresh) { + if (! defined($old_used) || $used != $old_used) { + syslog ("info", "pl_mom: Swap used: %d%%", $used); + } + get_slice_info(); + my $hog = memory_hog(); + if ($hog) { + if ($used < $kill_thresh) { + if (! defined($Warning{$hog})) { + $Warning{$hog} = "sent"; + syslog ("warning", "pl_mom: Slice $hog is ". + "using $Slice{$hog}{mem_pct}%% of memory"); + #slice_warning_mail($hog); + } + } else { + my $id = `id -u $hog`; + chomp($id); + my $top = `/usr/sbin/chcontext --ctx $id /usr/bin/top -b -n 1`; + syslog ("warning", "pl_mom: Resetting slice $hog"); + if (! $debug) { + slice_reset($hog); + } + syslog ("warning", "pl_mom: Sending mail to slice $hog"); + slice_reset_mail($hog, $top); + } + } + } + + sleep ($sleep); + + $old_used = $used; +} + +sub reboot_kicker { + while (1) { + $used = swap_used(); + + if ($used >= $reboot_thresh) { + syslog ("warning", "pl_mom: Rebooting node"); + + system("touch $rebootfile"); + if (! $debug) { + #system("shutdown -r now"); + system("/bin/sync; /sbin/reboot -f"); + } + die (0); + } + + sleep (1); + } +} + +sub bandwidth_monitor { + while (1) { + # See if a new day has started for bandwidth monitoring + chomp($now = `date -u +%D`); + if (-e $daily_stamp) { + chomp($stamp = `cat $daily_stamp`); + } + if (! defined($stamp) || !($stamp =~ $now)) { + open (STAMP, ">$daily_stamp") || + die "Can't open file $daily_stamp for writing: $!\n"; + print STAMP "$now\n"; + close STAMP; + unlink ($daily_log); + + # Could save the list of capped slices in a file in order to + # avoid re-sending mails if the daemon restarts. + # Also may want a list of slices that are exempt from capping. + if (defined(%Start)) { undef %Start; } + if (defined(%Now)) { undef %Now; } + if (defined(%Cap)) { undef %Cap; } + + reset_bandwidth_caps(); + + syslog("info", "pl_mom: Beginning bandwidth monitoring for $now"); + } + + get_slice_names(); + get_baseline_counts(); + get_slice_limits(); + + foreach $slice ( sort (keys %Start) ) { + if (defined $Now{$slice}) { + $today = $Now{$slice} - $Start{$slice}; + if (! (defined ($Cutoff{$slice})||$bwcap_default =~ /off/)) { + $Cutoff{$slice} = $cutoff_default; + $Maxrate{$slice} = $bwcap_default; + } + if ($debug) { + if ($today) { + $cutoff = defined($Cutoff{$slice}) + ? $Cutoff{$slice} : ""; + print "Slice $slice sent $today bytes; ". + "cutoff $cutoff\n"; + } + } + if (defined ($Cutoff{$slice}) && + $today >= $Cutoff{$slice} && + ! defined($Cap{$slice})) { + $Cap{$slice} = "sent"; + bw_cap_mail($slice); + log_bandwidth_cap($slice, $Maxrate{$slice}); + cap_bandwidth($slice, $Maxrate{$slice}); + } + } else { + # Token bucket for this slice is gone! + } + } + + sleep($bwmon_sleep); + } +} + +sub read_config_file { + if (-e $configfile) { + open (CONFIG, "<$configfile") || + print "Cannot open $configfile; $!\n"; + while () { + if (m/^(.*)=(.*)$/) { + ${$1} = $2; + if ($debug) { + print "read_config_file: $1 = ${$1}\n"; + } + } + } + close CONFIG; + } +} + +sub get_slice_names { + # Read slice names from /etc/passwd + if (defined (%Name)) { undef %Name; } + open (PASSWD, ") { + my ($slicename, $passwd, $sliceid) = split(/:/); + $Name{$sliceid} = $slicename; + } + close PASSWD; +} + +sub get_baseline_counts { + `touch $daily_log`; + open (BASE, "+<$daily_log") || + print "Cannot open $daily_log; $!\n"; + while () { + my ($slice, $bytecount) = split(/ /); + $Start{$slice} = $bytecount; + } + + my $status = `tc -s -d qdisc show`; + my $sliceid = 0xffff; + @Lines = split(/\n/, $status); + foreach $line ( @Lines ) { + if ($line =~ /qdisc pfifo (.*): dev/) { + $sliceid = hex($1); + # "Capped" buckets all begin with 0x1000. Ignore the root + # (0x1000) and default (0x1fff) buckets, as well as + # "exempt" buckets that begin with 0x2000 (or anything + # other than 0x1000). + if (($sliceid & 0xf000) == 0x1000 && + $sliceid != 0x1000 && $sliceid != 0x1fff) { + $sliceid = $sliceid & 0x0fff; + } else { + $sliceid = 0xffff; + } + } else { + if ($line =~ /Sent (.*) bytes/) { + my $bytes = $1; + if ($sliceid != 0xffff) { + my $slice = $Name{$sliceid}; + if ($debug && $bytes) { + print "Slice: $slice ($sliceid), bytes $bytes\n"; + } + if (! defined($Start{$slice})) { + print BASE "$slice $bytes\n"; + $Start{$slice} = $bytes; + } + $Now{$slice} = $bytes; + } + } + } + } + close (BASE); +} + +sub get_slice_limits { + if (defined %Maxrate) { undef %Maxrate; } + if (defined %Cutoff) { undef %Cutoff; } + if (-e $vservers) { + my $result = `grep -H "^BWAVGRATE" $vservers/*.conf`; + chomp ($result); + my @Lines = split(/\n/,$result); + foreach $line ( @Lines ) { + if ($line =~ /\/([^\/]*).conf:BWAVGRATE=(.*)[Mm]bit/) { + $slice = $1; + $limit = $2."Mbit"; + $cutoff = ($2 * 1000000 * 86400)/8; + } else { + if ($line =~ /\/([^\/]*).conf:BWAVGRATE=(.*)[Kk]bit/) { + $slice = $1; + $limit = $2."Kbit"; + $cutoff = ($2 * 1000 * 86400)/8; + } else { + die "Could not parse line $line"; + } + } + $Maxrate{$slice} = $limit; + $Cutoff{$slice} = $cutoff; + } + } +} + +sub reset_bandwidth_caps { + if (-e $capfile) { + open(CAP, "<$capfile") or die "Cannot open $capfile: $!"; + while () { + chomp(); + ($slicename, $oldcap) = split(/ /); + syslog("info", "pl_mom: Restoring bandwidth cap of $oldcap ". + "to $slicename"); + cap_bandwidth ($slicename, $oldcap); + } + close CAP; + unlink($capfile); + } +} + +sub log_bandwidth_cap { + ($slicename, $cap) = @_; + syslog("warning", "pl_mom: Capping bandwidth of slice ". + "$slicename at $cap until midnight GMT."); + # Save current cap to $capfile + system("echo $slicename `bwlimit getcap $slicename` >> $capfile"); +} + +sub send_mail { + # Arg 0: recipient addresses, comma-separated string + # Arg 1: subject line + # Arg 2: body of message + my $to = "To: $_[0]\n"; + my $from = "From: $from_addr\n"; + my $subject = "Subject: $_[1]\n"; + my $msg = $_[2]; + + if ($debug) { + print $to; + print $subject; + } else { + open(SENDMAIL, "|$sendmail") or die "Cannot open $sendmail: $!"; + print SENDMAIL $to; + print SENDMAIL $from; + print SENDMAIL $subject; + print SENDMAIL "Content-type: text/plain\n\n"; + print SENDMAIL $msg; + close(SENDMAIL); + } +} + +sub cap_bandwidth { + ($slicename, $cap) = @_; + system("bwlimit setcap $slicename $cap"); + system("bwlimit on $slicename"); +} + +sub get_date { + my ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdat) + = localtime(time); + my $date = sprintf("%4d-%02d-%02d %02d:%02d:%02d", + $year+1900, $mon+1, $mday, $hour, $min, $sec); + return $date; +} + +sub bw_cap_mail { + my ($slicename) = @_; + my $hostname = hostname(); + my $date = get_date(); + my $sent = int($Cutoff{$slicename}/(1024*1024)); + my $bwcap = $Maxrate{$slicename}; + + send_mail("$alias_addr, $slicename\@slices.planet-lab.org", + "$proc capped bandwidth of slice $slicename on $hostname", + "Slice $slicename has transmitted more than ${sent}MB today". + " on $hostname. ". + "Its bandwidth will be capped at $bwcap until midnight GMT.". + "\n\n$date $hostname bwcap $slicename\n"); +} + +sub shutdown_mail { + my $hostname = hostname(); + my $date = get_date(); + send_mail($alias_addr, + "$proc rebooted $hostname", + "Swap space was exhausted on $hostname and so $proc rebooted ". + "it.\n\nAs of $date, the node has successfully come back ". + "online.\n\n$date $hostname reboot\n"); +} + +sub slice_reset_mail { + my $hog = $_[0]; + my $top = $_[1]; + my $hog_mem = sprintf ("%0.f", $Slice{$hog}{pmem}/1000); + my $hog_pct = $Slice{$hog}{mem_pct}; + my $hostname = hostname(); + my $date = get_date(); + send_mail("$alias_addr, $hog\@slices.planet-lab.org", + "$proc reset slice $hog on $hostname", + "As of $date, swap space is nearly exhausted on $hostname.\n\n". + "Slice $hog is being reset since it is the largest consumer ". + "of physical memory at ${hog_mem}MB ($hog_pct%).\n\n". + "Please reply to this message explaining the nature of your ". + "experiment, and what you are doing to address the problem.\n". + "\nOutput of 'top -b -n 1' in your slice prior to reset:\n". + "$top\n\n$date $hostname reset $hog\n"); +} + +sub slice_warning_mail { + my $hog = $_[0]; + my $hog_mem = sprintf ("%0.f", $Slice{$hog}{pmem}/1000); + my $hog_pct = $Slice{$hog}{mem_pct}; + my $hostname = hostname(); + my $date = get_date(); + + if ($hog =~ /^root$/) { + $to = $alias_addr; + } else { + $to = "$alias_addr, $hog\@slices.planet-lab.org"; + } + + send_mail($to, + "$proc may reset slice $hog on $hostname", + "As of $date, swap space is over $log_thresh% full on ". + "$hostname.\n\nSlice $hog is the largest consumer ". + "of physical memory at ${hog_mem}MB ($hog_pct%).\n". + "Please check the memory usage of your slice to avoid a ". + "reset.\n\n$date $hostname warning $hog\n"); +} + +sub unkillable_alarm_mail { + my $hog = $_[0]; + my $hog_mem = sprintf ("%0.f", $Slice{$hog}{pmem}/1000); + my $hog_pct = $Slice{$hog}{mem_pct}; + my $hostname = hostname(); + my $date = get_date(); + + if ($hog =~ /^root$/) { + $to = $alias_addr; + } else { + $to = "$alias_addr, $hog\@slices.planet-lab.org"; + } + + send_mail($to, + "$proc: alarm for slice $hog on $hostname", + "As of $date, swap space is over $log_thresh% full on ". + "$hostname.\n\nSlice $hog is the largest consumer ". + "of physical memory at ${hog_mem}MB ($hog_pct%).\n". + "The slice will not be reset, but please verify its behavior.\n". + "\n$date $hostname alarm $hog\n"); +} + +sub slice_reset { + my $slice = $_[0]; + my $sliceid = $Slice{$slice}{ctx}; + system("chcontext --ctx $sliceid sudo kill -9 -1"); + system("/etc/init.d/vserver-init start $slice"); +} + +sub swap_used { + open (SWAP, "; + $line = ; + $line =~ s/[\t ]+/ /g; + + my ($filename, $type, $size, $used, $priority) = split(/ /, $line); + close SWAP; + + return 100*($used/$size); +} + +sub get_slice_info { + if (! $debug) { + $content = `curl -s http://127.0.0.1:3100/slicestat`; + } else { + #$content = `cat ../pl_mom-deploy/slicestat` + $content = `curl -s http://127.0.0.1:3100/slicestat`; + } + my @lines = split(/\n/, $content); + %Slice = (); + foreach $line (@lines) { + my ($slice, $ctx, $cpu_pct, $mem_pct, $pmem, $vmem, $ntasks) + = split(/,/,$line); + $Slice{$slice}{ctx} = $ctx; + $Slice{$slice}{cpu_pct} = $cpu_pct; + $Slice{$slice}{mem_pct} = $mem_pct; + $Slice{$slice}{pmem} = $pmem; + $Slice{$slice}{vmem} = $vmem; + $Slice{$slice}{ntasks} = $ntasks; + } +} + +sub memory_hog { + @keys = sort { $Slice{$b}{mem_pct} <=> $Slice{$a}{mem_pct} } (keys %Slice); + foreach $key (@keys) { + if ($Slice{$key}{mem_pct} >= $min_thresh) { + if ($key =~ /^root$/ || $key =~ /slicestat/ || $key =~ /netflow/) { + if (! defined ($Warning{$key})) { + $Warning{$key} = "sent"; + syslog ("warning", "pl_mom: Sending alarm mail to ". + "unkillable slice $key, using ". + "$Slice{$key}{mem_pct}%% of memory"); + unkillable_alarm_mail($key); + } + } else { + return $key; + } + } else { + #syslog ("info", "pl_mom: No killable slice using > ". + # "$min_thresh%% memory"); + return; + } + } +} + +sub daemonize { + chdir '/' or die "Can't chdir to /: $!"; + open STDIN, '/dev/null' or die "Can't read /dev/null: $!"; + open STDOUT, '>>/dev/null' or die "Can't write to /dev/null: $!"; + open STDERR, '>>/dev/null' or die "Can't write to /dev/null: $!"; + defined(my $pid = fork) or die "Can't fork: $!"; + exit if $pid; + setsid or die "Can't start a new session: $!"; + umask 0; +} diff --git a/pl_mom.spec b/pl_mom.spec index 2af45ec..4d3fd08 100644 --- a/pl_mom.spec +++ b/pl_mom.spec @@ -1,12 +1,12 @@ %define name pl_mom -%define version 0.4 -%define release 1%{?pldistro:.%{pldistro}}%{?date:.%{date}} +%define version 0.3 +%define release 15%{?pldistro:.%{pldistro}}%{?date:.%{date}} -Summary: PlanetLab node monitoring tools +Summary: PlanetLab mom -- Cleans up your mess Name: %{name} Version: %{version} Release: %{release} -License: GPL +License: dontknow Group: System Environment/Kernel Source: %{name}-%{version}.tgz Vendor: PlanetLab @@ -15,83 +15,73 @@ Distribution: PlanetLab 3.0 URL: http://cvs.planet-lab.org/cvs/pl_mom BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) Requires: expect -# swapmon requires vps and bwlimit.py -Requires: util-vserver, util-vserver-python -# bwmon requires tc -Requires: iproute %description -pl_mom is a suite of PlanetLab node monitoring tools. -swapmon is a swap monitoring daemon. Every 30 seconds, it checks -process memory usage. At 90% utilization, resets the slice that is -consuming the most physical memory. At 95% utilization, it reboots the -machine to avoid a crash. +A small daemon that watches the consumed swap space. At 90% utilization, it +resets the slice that is the biggest memory hog. At 95% utilization, it +reboots the machine. -bwmon is a cron job that monitors the average bandwidth usage of each -slice and enforces a daily byte limit for each slice. - -pl_mop is a cron job that "fixes" various common problems with nodes -(dead services, ext3 corruption, zombie SSH sessions, etc.). +A cron job which "fixes" various common problems with nodes (dead +services, ext3 corruption, zombie SSH sessions) is also installed. %prep -%setup -q + +%setup %build %install -rm -rf $RPM_BUILD_ROOT +mkdir -p $RPM_BUILD_ROOT/usr/local/planetlab/bin/ +mkdir -p $RPM_BUILD_ROOT/etc/init.d/ -# Utility functions -install -D -m 644 pl_mom.py $RPM_BUILD_ROOT/%{_datadir}/%{name}/pl_mom.py +cp pl_mom $RPM_BUILD_ROOT/etc/init.d/ +cp pl_mom.pl $RPM_BUILD_ROOT/usr/local/planetlab/bin/ +cp pl_mop.sh $RPM_BUILD_ROOT/usr/local/planetlab/bin/ -# Bandwidth monitor (bwmon), run periodically -install -D -m 755 bwmon.py $RPM_BUILD_ROOT/%{_datadir}/%{name}/bwmon.py +install -D -m 644 pl_mop.cron $RPM_BUILD_ROOT/etc/cron.d/pl_mop -# Swap monitoring daemon (swapmon) -install -D -m 755 swapmon.py $RPM_BUILD_ROOT/%{_datadir}/%{name}/swapmon.py -install -D -m 755 swapmon.init $RPM_BUILD_ROOT/%{_initrddir}/swapmon +%clean +[ "$RPM_BUILD_ROOT" != "/" ] && rm -rf $RPM_BUILD_ROOT -# Cleanup script -install -D -m 755 pl_mop.sh $RPM_BUILD_ROOT/usr/local/planetlab/bin/pl_mop.sh +%files +%defattr(0755, root, root) +/etc/init.d/pl_mom +/usr/local/planetlab/bin/pl_mom.pl +/usr/local/planetlab/bin/pl_mop.sh +/etc/cron.d/pl_mop -# Runs pl_mop and bwmon periodically -install -D -m 644 pl_mom.cron $RPM_BUILD_ROOT/%{_sysconfdir}/cron.d/pl_mom +%pre -%clean -rm -rf $RPM_BUILD_ROOT %post -chkconfig --add swapmon -chkconfig swapmon on -if [ "$PL_BOOTCD" != "1" ] ; then - service swapmon restart -fi +if [ "$1" -ge 1 ]; then -# Randomize pl_mop run time -M=$((60 * $RANDOM / 32768)) -H=$((24 * $RANDOM / 32768)) -sed -i -e "s/@M@/$M/" -e "s/@H@/$H/" %{_sysconfdir}/cron.d/pl_mom + chkconfig --add pl_mom + chkconfig --level 3 pl_mom on -exit 0 + if [[ "$PL_BOOTCD" != "1" ]]; then + /etc/init.d/pl_mom stop + /etc/init.d/pl_mom start + fi + + # Randomize pl_mop run time + M=$((60 * $RANDOM / 32768)) + H=$((24 * $RANDOM / 32768)) + sed -i -e "s/@M@/$M/" -e "s/@H@/$H/" /etc/cron.d/pl_mop +fi %preun -# 0 = erase, 1 = upgrade if [ "$1" -eq 0 ]; then - if [ "$PL_BOOTCD" != "1" ] ; then - service swapmon stop - fi - chkconfig swapmon off - chkconfig --del swapmon + if [[ "$PL_BOOTCD" != "1" ]]; then + /etc/init.d/pl_mom stop + fi + + chkconfig --del pl_mom + chkconfig pl_mom off fi -exit 0 -%files -%defattr(-, root, root, -) -%{_datadir}/%{name}/pl_mom.py -%{_datadir}/%{name}/bwmon.py -%{_datadir}/%{name}/swapmon.py -%{_initrddir}/swapmon -/usr/local/planetlab/bin/pl_mop.sh -%{_sysconfdir}/cron.d/pl_mom +%postun + + diff --git a/pl_mop.cron b/pl_mop.cron new file mode 100644 index 0000000..6f924f1 --- /dev/null +++ b/pl_mop.cron @@ -0,0 +1,10 @@ +# +# Runs once a day to "fix" nodes in various ways +# +# Mark Huang +# Copyright (C) 2005 The Trustees of Princeton University +# +# $Id: plb.cron,v 1.1 2005/09/27 23:55:02 mlh-pl_rock Exp $ +# + +@M@ @H@ * * * root /usr/local/planetlab/bin/pl_mop.sh diff --git a/pl_mop.sh b/pl_mop.sh index edc8222..5ac5546 100755 --- a/pl_mop.sh +++ b/pl_mop.sh @@ -5,7 +5,7 @@ # Mark Huang # Copyright (C) 2005 The Trustees of Princeton University # -# $Id: pl_mop.sh,v 1.5 2006/03/06 20:40:33 mlhuang Exp $ +# $Id: pl_mop.sh,v 1.4 2006/01/26 19:26:20 mlhuang Exp $ # PATH=/sbin:/usr/sbin:$PATH @@ -46,7 +46,7 @@ fix_etc_shadow() { # keep essential services running restart_services() { - for service in sshd pl_sshd swapmon pl_nm proper ; do + for service in sshd pl_sshd pl_mom pl_nm proper ; do echo "* Checking $service" status=$(service $service status) if [ $? -ne 0 ] || echo $status 2>&1 | grep -q stopped ; then -- 2.43.0