X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=pl_mom.pl;h=ecab7bfc8fa42b8d7d191b5bdfd320b764550147;hb=56223fd521ba496f961baf743698ef2cfaebf76e;hp=55f8c9ea02423b6ca849237ef3cd5ab56ef226d9;hpb=cffca65277f8d696f358e2b9e9f3fc77da07a007;p=mom.git diff --git a/pl_mom.pl b/pl_mom.pl index 55f8c9e..ecab7bf 100755 --- a/pl_mom.pl +++ b/pl_mom.pl @@ -5,11 +5,12 @@ use Sys::Syslog; use Sys::Hostname; #use LWP::Simple; -$debug = 0; +$debug = 1; $proc = "pl_mom"; $alias_addr = "pl-mom\@planet-lab.org"; $from_addr = "support\@planet-lab.org"; -$bwcap = "1.5Mbit"; +#$bwcap = "1.5Mbit"; +$bwcap = "off"; if (! $debug) { $kill_thresh = 90; @@ -17,8 +18,7 @@ if (! $debug) { $log_thresh = 85; $change_thresh = 5; $min_thresh = 10; - #$byte_cutoff = 16000000000; # 16GB - $byte_cutoff = 5000000000; # 5GB + $byte_cutoff = 16000000000; # 16GB $bwmon_sleep = 900; $sendmail = "/usr/sbin/sendmail -t -f$from_addr"; @@ -26,6 +26,8 @@ if (! $debug) { $rebootfile = "/var/lib/misc/pl_mom.reboot"; $daily_log = "/var/lib/misc/pl_mom.daily"; $daily_stamp = "/var/lib/misc/pl_mom.stamp"; + $configfile = "/etc/planetlab/pl_mom.conf"; + $capfile = "/var/lib/misc/pl_mom.oldcaps"; } else { $kill_thresh = 2; $reboot_thresh = 20; @@ -40,6 +42,8 @@ if (! $debug) { $rebootfile = "./pl_mom.reboot"; $daily_log = "./pl_mom.daily"; $daily_stamp = "./pl_mom.stamp"; + $configfile = "./pl_mom.conf"; + $capfile = "./pl_mom.oldcaps"; } $sleep = 30; @@ -51,6 +55,8 @@ if (! $debug) { system("echo $$ > $pidfile"); +read_config_file(); + # Check to see whether pl_mom rebooted the node if (-e $rebootfile) { unlink($rebootfile); @@ -67,6 +73,10 @@ if (! $pid) { $pid = fork(); if (! $pid) { syslog ("info", "pl_mom: Launching bandwidth monitor"); + if ($bwcap =~ /off/) { + syslog("info", "pl_mom: Bandwidth capping is off"); + } + reset_bandwidth_caps(); bandwidth_monitor(); die (0); } @@ -98,7 +108,7 @@ while (1) { } else { my $id = `id -u $hog`; chomp($id); - my $top = `chcontext --ctx $id top -b -n 1`; + my $top = `/usr/sbin/chcontext --ctx $id /usr/bin/top -b -n 1`; syslog ("warning", "pl_mom: Resetting slice $hog"); if (! $debug) { slice_reset($hog); @@ -123,7 +133,8 @@ sub reboot_kicker { system("touch $rebootfile"); if (! $debug) { - system("shutdown -r now"); + #system("shutdown -r now"); + system("/bin/sync; /sbin/reboot -f"); } die (0); } @@ -149,11 +160,11 @@ sub bandwidth_monitor { # Could save the list of capped slices in a file in order to # avoid re-sending mails if the daemon restarts. # Also may want a list of slices that are exempt from capping. - if (defined(%Cap)) { - undef %Cap; - } + if (defined(%Start)) { undef %Start; } + if (defined(%Now)) { undef %Now; } + if (defined(%Cap)) { undef %Cap; } - # Need to reset bandwidth limits here + reset_bandwidth_caps(); syslog("info", "pl_mom: Beginning bandwidth monitoring for $now"); } @@ -191,23 +202,26 @@ sub bandwidth_monitor { } } close (BASE); - - foreach $slice ( sort (keys %Start) ) { - if (defined $Now{$slice}) { - $today = $Now{$slice} - $Start{$slice}; - if ($today >= $byte_cutoff && ! defined($Cap{$slice})) { - $Cap{$slice} = "sent"; - $slicename = get_slice_name($slice); - if ($slicename) { - bw_cap_mail($slicename); - } else { - syslog("warning", "pl_mom: Could not find slice name". - " for slice ID $slice"); + + if (!($bwcap =~ /off/)) { + foreach $slice ( sort (keys %Start) ) { + if (defined $Now{$slice}) { + $today = $Now{$slice} - $Start{$slice}; + if ($today >= $byte_cutoff && ! defined($Cap{$slice})) { + $Cap{$slice} = "sent"; + $slicename = get_slice_name($slice); + if ($slicename) { + bw_cap_mail($slicename); + log_bandwidth_cap($slicename, $bwcap); + cap_bandwidth($slicename, $bwcap); + } else { + syslog("warning", "pl_mom: Could not find slice ". + "name for slice ID $slice"); + } } - # Add bandwidth cap here + } else { + # Token bucket for this slice is gone! } - } else { - # The /proc/virtual/ directory is gone... } } @@ -215,6 +229,45 @@ sub bandwidth_monitor { } } +sub read_config_file { + if (-e $configfile) { + open (CONFIG, "<$configfile") || + print "Cannot open $configfile; $!\n"; + while () { + if (m/^(.*)=(.*)$/) { + ${$1} = $2; + if ($debug) { + print "read_config_file: $1 = ${$1}\n"; + } + } + } + close CONFIG; + } +} + +sub reset_bandwidth_caps { + if (-e $capfile) { + open(CAP, "<$capfile") or die "Cannot open $capfile: $!"; + while () { + chomp(); + ($slicename, $oldcap) = split(/ /); + syslog("info", "pl_mom: Restoring bandwidth cap of $oldcap ". + "to $slicename"); + cap_bandwidth ($slicename, $oldcap); + } + close CAP; + unlink($capfile); + } +} + +sub log_bandwidth_cap { + ($slicename, $cap) = @_; + syslog("warning", "pl_mom: Capping bandwidth of slice ". + "$slicename at $cap until midnight GMT."); + # Save current cap to $capfile + system("echo $slicename `bwlimit getcap $slicename` >> $capfile"); +} + sub send_mail { # Arg 0: recipient addresses, comma-separated string # Arg 1: subject line @@ -233,6 +286,12 @@ sub send_mail { close(SENDMAIL); } +sub cap_bandwidth { + ($slicename, $cap) = @_; + system("bwlimit setcap $slicename $cap"); + system("bwlimit on $slicename"); +} + sub get_date { my ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdat) = localtime(time); @@ -264,11 +323,7 @@ sub bw_cap_mail { my $date = get_date(); my $sent = int($byte_cutoff/1000000000); - # Put this here because this is where we have the - syslog("warning", "pl_mom: Capping bandwidth of slice ". - "$slicename at $bwcap until midnight GMT."); - - send_mail("acb\@planet-lab.org", + send_mail("$alias_addr, $slicename\@slices.planet-lab.org", "$proc capped bandwidth of slice $slicename on $hostname", "Slice $slicename has transmitted more than ${sent}GB today". " on $hostname. ". @@ -371,10 +426,10 @@ sub swap_used { sub get_slice_info { if (! $debug) { - #$content = get "http://127.0.0.1:3100/slicestat"; $content = `curl -s http://127.0.0.1:3100/slicestat`; } else { - $content = `cat ../pl_mom-deploy/slicestat` + #$content = `cat ../pl_mom-deploy/slicestat` + $content = `curl -s http://127.0.0.1:3100/slicestat`; } my @lines = split(/\n/, $content); %Slice = ();